-- ============================================================ -- LegiScan Legislative Analysis Queries -- Database: data_centers Schema: public -- ============================================================ -- -- SETUP -- Populate the database first: -- python ingest_legiscan.py --all -- This downloads ~646 sessions (2016-2026, all US states + federal), -- loads ~1.3M bills, and tags ~60K as relevant. -- -- To refresh (weekly dataset updates from LegiScan): -- python ingest_legiscan.py --fetch --load -- Already-imported sessions with unchanged dataset_hash are skipped. -- -- To retag after editing keyword lists in ingest_legiscan.py: -- python ingest_legiscan.py --tag -- -- RELEVANCE TAGS (stored in legiscan_bills.relevance_tags[]): -- data_center - Bills naming data centers, hyperscale, colocation, AI campuses -- large_load - Crypto mining, large industrial loads, extraordinary load -- ratepayer_protection- Cost shifting, cross-subsidy, rate design, affordability -- grid_impact - Grid reliability, transmission, interconnection queue -- tax_incentive - Tax exemptions/abatements/credits for facilities -- energy_policy - Renewable PPAs, green tariffs, clean electricity -- water_use - Cooling water, evaporative cooling, water footprint -- siting_permitting - Zoning, conditional use permits, local control -- -- STATUS CODES (legiscan_bills.status): -- 1=Introduced 2=Engrossed 3=Enrolled 4=Passed 5=Vetoed -- 6=Failed 7=Override 8=Chaptered 9=Referred 12=Draft -- ============================================================ -- ── Quick overview ────────────────────────────────────────── SELECT COUNT(*) AS total_bills, COUNT(*) FILTER (WHERE is_relevant) AS relevant_bills, COUNT(DISTINCT state) AS states, MIN(ls.year_start) AS year_from, MAX(ls.year_end) AS year_to FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id); -- ── Bills per relevance tag ───────────────────────────────── SELECT tag, COUNT(*) AS bill_count, COUNT(*) FILTER (WHERE lb.status = 4) AS passed, COUNT(*) FILTER (WHERE lb.status IN (4,8)) AS enacted FROM legiscan_bills lb, unnest(relevance_tags) AS tag GROUP BY tag ORDER BY bill_count DESC; -- ── Top states for relevant legislation ──────────────────── SELECT state, COUNT(*) AS relevant_bills, COUNT(*) FILTER (WHERE 'data_center' = ANY(relevance_tags)) AS data_center, COUNT(*) FILTER (WHERE 'large_load' = ANY(relevance_tags)) AS large_load, COUNT(*) FILTER (WHERE 'ratepayer_protection' = ANY(relevance_tags)) AS ratepayer, COUNT(*) FILTER (WHERE 'tax_incentive' = ANY(relevance_tags)) AS tax_incentive, COUNT(*) FILTER (WHERE 'grid_impact' = ANY(relevance_tags)) AS grid_impact FROM legiscan_bills WHERE is_relevant GROUP BY state ORDER BY relevant_bills DESC LIMIT 20; -- ── Trend by year ─────────────────────────────────────────── SELECT ls.year_start AS year, COUNT(lb.bill_id) AS total_bills, COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant) AS relevant_bills, COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant AND lb.status IN (4,8)) AS enacted, ROUND(100.0 * COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant) / NULLIF(COUNT(lb.bill_id), 0), 1) AS pct_relevant FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) GROUP BY ls.year_start ORDER BY ls.year_start; -- ── Data center bills specifically ───────────────────────── SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.relevance_tags, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE 'data_center' = ANY(lb.relevance_tags) ORDER BY CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END, ls.year_start DESC, lb.state; -- ── Ratepayer protection bills ────────────────────────────── SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.relevance_tags, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE 'ratepayer_protection' = ANY(lb.relevance_tags) ORDER BY CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END, ls.year_start DESC, lb.state; -- ── Bills at intersection of data center + ratepayer ─────── SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.relevance_tags, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE 'data_center' = ANY(lb.relevance_tags) AND 'ratepayer_protection' = ANY(lb.relevance_tags) ORDER BY ls.year_start DESC, lb.state; -- ── Large load + grid impact ──────────────────────────────── SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.relevance_tags, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE 'large_load' = ANY(lb.relevance_tags) AND 'grid_impact' = ANY(lb.relevance_tags) ORDER BY ls.year_start DESC, lb.state; -- ── Tax incentive bills passed/enacted ───────────────────── SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE 'tax_incentive' = ANY(lb.relevance_tags) AND lb.status IN (4, 8) -- Passed or Chaptered ORDER BY ls.year_start DESC, lb.state; -- ── Join to data centers: states with both DCs and active legislation ── SELECT dc.state, COUNT(DISTINCT dc.id) AS data_centers, COUNT(DISTINCT lb.bill_id) AS relevant_bills, COUNT(DISTINCT lb.bill_id) FILTER (WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)) AS ratepayer_bills, COUNT(DISTINCT lb.bill_id) FILTER (WHERE 'data_center' = ANY(lb.relevance_tags)) AS dc_specific_bills, COUNT(DISTINCT lb.bill_id) FILTER (WHERE lb.status IN (4,8)) AS enacted_bills FROM master_data_centers dc LEFT JOIN legiscan_bills lb ON dc.state = lb.state AND lb.is_relevant GROUP BY dc.state ORDER BY relevant_bills DESC; -- ── Full-text search: find bills mentioning specific terms ── -- Replace 'hyperscale' with any keyword of interest SELECT lb.state, lb.bill_number, ls.year_start AS year, lb.status, lb.title, lb.description, lb.url FROM legiscan_bills lb JOIN legiscan_sessions ls USING (session_id) WHERE to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,'')) @@ to_tsquery('english', 'hyperscale | colocation | "data center"') ORDER BY ts_rank( to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,'')), to_tsquery('english', 'hyperscale | colocation | "data center"') ) DESC LIMIT 50; -- ── Session coverage check ────────────────────────────────── SELECT state_abbr, COUNT(*) AS sessions_loaded, SUM(bill_count) AS total_bills, MIN(year_start) AS earliest, MAX(year_end) AS latest FROM legiscan_sessions GROUP BY state_abbr ORDER BY state_abbr;