Adds ingest_legiscan.py to pull all US state + federal bills (2016-2026) from the LegiScan API into legiscan_sessions and legiscan_bills tables. Bills are keyword-tagged across 8 research categories (data_center, ratepayer_protection, large_load, grid_impact, tax_incentive, etc.). Loads ~1.3M bills; ~60K tagged relevant. Adds query_legiscan_bills.sql with pre-built analysis queries including state/DC joins. Updates database-tables.md, README.md, and research-ideas.md accordingly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
218 lines
8.4 KiB
SQL
218 lines
8.4 KiB
SQL
-- ============================================================
|
|
-- LegiScan Legislative Analysis Queries
|
|
-- Database: data_centers Schema: public
|
|
-- ============================================================
|
|
--
|
|
-- SETUP
|
|
-- Populate the database first:
|
|
-- python ingest_legiscan.py --all
|
|
-- This downloads ~646 sessions (2016-2026, all US states + federal),
|
|
-- loads ~1.3M bills, and tags ~60K as relevant.
|
|
--
|
|
-- To refresh (weekly dataset updates from LegiScan):
|
|
-- python ingest_legiscan.py --fetch --load
|
|
-- Already-imported sessions with unchanged dataset_hash are skipped.
|
|
--
|
|
-- To retag after editing keyword lists in ingest_legiscan.py:
|
|
-- python ingest_legiscan.py --tag
|
|
--
|
|
-- RELEVANCE TAGS (stored in legiscan_bills.relevance_tags[]):
|
|
-- data_center - Bills naming data centers, hyperscale, colocation, AI campuses
|
|
-- large_load - Crypto mining, large industrial loads, extraordinary load
|
|
-- ratepayer_protection- Cost shifting, cross-subsidy, rate design, affordability
|
|
-- grid_impact - Grid reliability, transmission, interconnection queue
|
|
-- tax_incentive - Tax exemptions/abatements/credits for facilities
|
|
-- energy_policy - Renewable PPAs, green tariffs, clean electricity
|
|
-- water_use - Cooling water, evaporative cooling, water footprint
|
|
-- siting_permitting - Zoning, conditional use permits, local control
|
|
--
|
|
-- STATUS CODES (legiscan_bills.status):
|
|
-- 1=Introduced 2=Engrossed 3=Enrolled 4=Passed 5=Vetoed
|
|
-- 6=Failed 7=Override 8=Chaptered 9=Referred 12=Draft
|
|
-- ============================================================
|
|
|
|
-- ── Quick overview ──────────────────────────────────────────
|
|
|
|
SELECT
|
|
COUNT(*) AS total_bills,
|
|
COUNT(*) FILTER (WHERE is_relevant) AS relevant_bills,
|
|
COUNT(DISTINCT state) AS states,
|
|
MIN(ls.year_start) AS year_from,
|
|
MAX(ls.year_end) AS year_to
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id);
|
|
|
|
-- ── Bills per relevance tag ─────────────────────────────────
|
|
|
|
SELECT
|
|
tag,
|
|
COUNT(*) AS bill_count,
|
|
COUNT(*) FILTER (WHERE lb.status = 4) AS passed,
|
|
COUNT(*) FILTER (WHERE lb.status IN (4,8)) AS enacted
|
|
FROM legiscan_bills lb, unnest(relevance_tags) AS tag
|
|
GROUP BY tag
|
|
ORDER BY bill_count DESC;
|
|
|
|
-- ── Top states for relevant legislation ────────────────────
|
|
|
|
SELECT
|
|
state,
|
|
COUNT(*) AS relevant_bills,
|
|
COUNT(*) FILTER (WHERE 'data_center' = ANY(relevance_tags)) AS data_center,
|
|
COUNT(*) FILTER (WHERE 'large_load' = ANY(relevance_tags)) AS large_load,
|
|
COUNT(*) FILTER (WHERE 'ratepayer_protection' = ANY(relevance_tags)) AS ratepayer,
|
|
COUNT(*) FILTER (WHERE 'tax_incentive' = ANY(relevance_tags)) AS tax_incentive,
|
|
COUNT(*) FILTER (WHERE 'grid_impact' = ANY(relevance_tags)) AS grid_impact
|
|
FROM legiscan_bills
|
|
WHERE is_relevant
|
|
GROUP BY state
|
|
ORDER BY relevant_bills DESC
|
|
LIMIT 20;
|
|
|
|
-- ── Trend by year ───────────────────────────────────────────
|
|
|
|
SELECT
|
|
ls.year_start AS year,
|
|
COUNT(lb.bill_id) AS total_bills,
|
|
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant) AS relevant_bills,
|
|
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant AND lb.status IN (4,8)) AS enacted,
|
|
ROUND(100.0 * COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant)
|
|
/ NULLIF(COUNT(lb.bill_id), 0), 1) AS pct_relevant
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
GROUP BY ls.year_start
|
|
ORDER BY ls.year_start;
|
|
|
|
-- ── Data center bills specifically ─────────────────────────
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.relevance_tags,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE 'data_center' = ANY(lb.relevance_tags)
|
|
ORDER BY
|
|
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
|
|
ls.year_start DESC,
|
|
lb.state;
|
|
|
|
-- ── Ratepayer protection bills ──────────────────────────────
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.relevance_tags,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)
|
|
ORDER BY
|
|
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
|
|
ls.year_start DESC,
|
|
lb.state;
|
|
|
|
-- ── Bills at intersection of data center + ratepayer ───────
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.relevance_tags,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE 'data_center' = ANY(lb.relevance_tags)
|
|
AND 'ratepayer_protection' = ANY(lb.relevance_tags)
|
|
ORDER BY ls.year_start DESC, lb.state;
|
|
|
|
-- ── Large load + grid impact ────────────────────────────────
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.relevance_tags,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE 'large_load' = ANY(lb.relevance_tags)
|
|
AND 'grid_impact' = ANY(lb.relevance_tags)
|
|
ORDER BY ls.year_start DESC, lb.state;
|
|
|
|
-- ── Tax incentive bills passed/enacted ─────────────────────
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE 'tax_incentive' = ANY(lb.relevance_tags)
|
|
AND lb.status IN (4, 8) -- Passed or Chaptered
|
|
ORDER BY ls.year_start DESC, lb.state;
|
|
|
|
-- ── Join to data centers: states with both DCs and active legislation ──
|
|
|
|
SELECT
|
|
dc.state,
|
|
COUNT(DISTINCT dc.id) AS data_centers,
|
|
COUNT(DISTINCT lb.bill_id) AS relevant_bills,
|
|
COUNT(DISTINCT lb.bill_id)
|
|
FILTER (WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)) AS ratepayer_bills,
|
|
COUNT(DISTINCT lb.bill_id)
|
|
FILTER (WHERE 'data_center' = ANY(lb.relevance_tags)) AS dc_specific_bills,
|
|
COUNT(DISTINCT lb.bill_id)
|
|
FILTER (WHERE lb.status IN (4,8)) AS enacted_bills
|
|
FROM master_data_centers dc
|
|
LEFT JOIN legiscan_bills lb ON dc.state = lb.state AND lb.is_relevant
|
|
GROUP BY dc.state
|
|
ORDER BY relevant_bills DESC;
|
|
|
|
-- ── Full-text search: find bills mentioning specific terms ──
|
|
-- Replace 'hyperscale' with any keyword of interest
|
|
|
|
SELECT
|
|
lb.state,
|
|
lb.bill_number,
|
|
ls.year_start AS year,
|
|
lb.status,
|
|
lb.title,
|
|
lb.description,
|
|
lb.url
|
|
FROM legiscan_bills lb
|
|
JOIN legiscan_sessions ls USING (session_id)
|
|
WHERE to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,''))
|
|
@@ to_tsquery('english', 'hyperscale | colocation | "data center"')
|
|
ORDER BY ts_rank(
|
|
to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,'')),
|
|
to_tsquery('english', 'hyperscale | colocation | "data center"')
|
|
) DESC
|
|
LIMIT 50;
|
|
|
|
-- ── Session coverage check ──────────────────────────────────
|
|
|
|
SELECT
|
|
state_abbr,
|
|
COUNT(*) AS sessions_loaded,
|
|
SUM(bill_count) AS total_bills,
|
|
MIN(year_start) AS earliest,
|
|
MAX(year_end) AS latest
|
|
FROM legiscan_sessions
|
|
GROUP BY state_abbr
|
|
ORDER BY state_abbr;
|