Reorganize project into scripts/, docs/, data/, output/ directories
Move all Python scripts to scripts/, documentation to docs/, raw input data to data/, and generated HTML/CSV outputs to output/. Update path references in 8 scripts to use Path(__file__).parent.parent as project root so they work correctly from the new location. Update README links and quick-start commands accordingly. Notebooks remain at root. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
217
docs/query_legiscan_bills.sql
Normal file
217
docs/query_legiscan_bills.sql
Normal file
@@ -0,0 +1,217 @@
|
||||
-- ============================================================
|
||||
-- LegiScan Legislative Analysis Queries
|
||||
-- Database: data_centers Schema: public
|
||||
-- ============================================================
|
||||
--
|
||||
-- SETUP
|
||||
-- Populate the database first:
|
||||
-- python ingest_legiscan.py --all
|
||||
-- This downloads ~646 sessions (2016-2026, all US states + federal),
|
||||
-- loads ~1.3M bills, and tags ~60K as relevant.
|
||||
--
|
||||
-- To refresh (weekly dataset updates from LegiScan):
|
||||
-- python ingest_legiscan.py --fetch --load
|
||||
-- Already-imported sessions with unchanged dataset_hash are skipped.
|
||||
--
|
||||
-- To retag after editing keyword lists in ingest_legiscan.py:
|
||||
-- python ingest_legiscan.py --tag
|
||||
--
|
||||
-- RELEVANCE TAGS (stored in legiscan_bills.relevance_tags[]):
|
||||
-- data_center - Bills naming data centers, hyperscale, colocation, AI campuses
|
||||
-- large_load - Crypto mining, large industrial loads, extraordinary load
|
||||
-- ratepayer_protection- Cost shifting, cross-subsidy, rate design, affordability
|
||||
-- grid_impact - Grid reliability, transmission, interconnection queue
|
||||
-- tax_incentive - Tax exemptions/abatements/credits for facilities
|
||||
-- energy_policy - Renewable PPAs, green tariffs, clean electricity
|
||||
-- water_use - Cooling water, evaporative cooling, water footprint
|
||||
-- siting_permitting - Zoning, conditional use permits, local control
|
||||
--
|
||||
-- STATUS CODES (legiscan_bills.status):
|
||||
-- 1=Introduced 2=Engrossed 3=Enrolled 4=Passed 5=Vetoed
|
||||
-- 6=Failed 7=Override 8=Chaptered 9=Referred 12=Draft
|
||||
-- ============================================================
|
||||
|
||||
-- ── Quick overview ──────────────────────────────────────────
|
||||
|
||||
SELECT
|
||||
COUNT(*) AS total_bills,
|
||||
COUNT(*) FILTER (WHERE is_relevant) AS relevant_bills,
|
||||
COUNT(DISTINCT state) AS states,
|
||||
MIN(ls.year_start) AS year_from,
|
||||
MAX(ls.year_end) AS year_to
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id);
|
||||
|
||||
-- ── Bills per relevance tag ─────────────────────────────────
|
||||
|
||||
SELECT
|
||||
tag,
|
||||
COUNT(*) AS bill_count,
|
||||
COUNT(*) FILTER (WHERE lb.status = 4) AS passed,
|
||||
COUNT(*) FILTER (WHERE lb.status IN (4,8)) AS enacted
|
||||
FROM legiscan_bills lb, unnest(relevance_tags) AS tag
|
||||
GROUP BY tag
|
||||
ORDER BY bill_count DESC;
|
||||
|
||||
-- ── Top states for relevant legislation ────────────────────
|
||||
|
||||
SELECT
|
||||
state,
|
||||
COUNT(*) AS relevant_bills,
|
||||
COUNT(*) FILTER (WHERE 'data_center' = ANY(relevance_tags)) AS data_center,
|
||||
COUNT(*) FILTER (WHERE 'large_load' = ANY(relevance_tags)) AS large_load,
|
||||
COUNT(*) FILTER (WHERE 'ratepayer_protection' = ANY(relevance_tags)) AS ratepayer,
|
||||
COUNT(*) FILTER (WHERE 'tax_incentive' = ANY(relevance_tags)) AS tax_incentive,
|
||||
COUNT(*) FILTER (WHERE 'grid_impact' = ANY(relevance_tags)) AS grid_impact
|
||||
FROM legiscan_bills
|
||||
WHERE is_relevant
|
||||
GROUP BY state
|
||||
ORDER BY relevant_bills DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- ── Trend by year ───────────────────────────────────────────
|
||||
|
||||
SELECT
|
||||
ls.year_start AS year,
|
||||
COUNT(lb.bill_id) AS total_bills,
|
||||
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant) AS relevant_bills,
|
||||
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant AND lb.status IN (4,8)) AS enacted,
|
||||
ROUND(100.0 * COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant)
|
||||
/ NULLIF(COUNT(lb.bill_id), 0), 1) AS pct_relevant
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
GROUP BY ls.year_start
|
||||
ORDER BY ls.year_start;
|
||||
|
||||
-- ── Data center bills specifically ─────────────────────────
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.relevance_tags,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE 'data_center' = ANY(lb.relevance_tags)
|
||||
ORDER BY
|
||||
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
|
||||
ls.year_start DESC,
|
||||
lb.state;
|
||||
|
||||
-- ── Ratepayer protection bills ──────────────────────────────
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.relevance_tags,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)
|
||||
ORDER BY
|
||||
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
|
||||
ls.year_start DESC,
|
||||
lb.state;
|
||||
|
||||
-- ── Bills at intersection of data center + ratepayer ───────
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.relevance_tags,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE 'data_center' = ANY(lb.relevance_tags)
|
||||
AND 'ratepayer_protection' = ANY(lb.relevance_tags)
|
||||
ORDER BY ls.year_start DESC, lb.state;
|
||||
|
||||
-- ── Large load + grid impact ────────────────────────────────
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.relevance_tags,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE 'large_load' = ANY(lb.relevance_tags)
|
||||
AND 'grid_impact' = ANY(lb.relevance_tags)
|
||||
ORDER BY ls.year_start DESC, lb.state;
|
||||
|
||||
-- ── Tax incentive bills passed/enacted ─────────────────────
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE 'tax_incentive' = ANY(lb.relevance_tags)
|
||||
AND lb.status IN (4, 8) -- Passed or Chaptered
|
||||
ORDER BY ls.year_start DESC, lb.state;
|
||||
|
||||
-- ── Join to data centers: states with both DCs and active legislation ──
|
||||
|
||||
SELECT
|
||||
dc.state,
|
||||
COUNT(DISTINCT dc.id) AS data_centers,
|
||||
COUNT(DISTINCT lb.bill_id) AS relevant_bills,
|
||||
COUNT(DISTINCT lb.bill_id)
|
||||
FILTER (WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)) AS ratepayer_bills,
|
||||
COUNT(DISTINCT lb.bill_id)
|
||||
FILTER (WHERE 'data_center' = ANY(lb.relevance_tags)) AS dc_specific_bills,
|
||||
COUNT(DISTINCT lb.bill_id)
|
||||
FILTER (WHERE lb.status IN (4,8)) AS enacted_bills
|
||||
FROM master_data_centers dc
|
||||
LEFT JOIN legiscan_bills lb ON dc.state = lb.state AND lb.is_relevant
|
||||
GROUP BY dc.state
|
||||
ORDER BY relevant_bills DESC;
|
||||
|
||||
-- ── Full-text search: find bills mentioning specific terms ──
|
||||
-- Replace 'hyperscale' with any keyword of interest
|
||||
|
||||
SELECT
|
||||
lb.state,
|
||||
lb.bill_number,
|
||||
ls.year_start AS year,
|
||||
lb.status,
|
||||
lb.title,
|
||||
lb.description,
|
||||
lb.url
|
||||
FROM legiscan_bills lb
|
||||
JOIN legiscan_sessions ls USING (session_id)
|
||||
WHERE to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,''))
|
||||
@@ to_tsquery('english', 'hyperscale | colocation | "data center"')
|
||||
ORDER BY ts_rank(
|
||||
to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,'')),
|
||||
to_tsquery('english', 'hyperscale | colocation | "data center"')
|
||||
) DESC
|
||||
LIMIT 50;
|
||||
|
||||
-- ── Session coverage check ──────────────────────────────────
|
||||
|
||||
SELECT
|
||||
state_abbr,
|
||||
COUNT(*) AS sessions_loaded,
|
||||
SUM(bill_count) AS total_bills,
|
||||
MIN(year_start) AS earliest,
|
||||
MAX(year_end) AS latest
|
||||
FROM legiscan_sessions
|
||||
GROUP BY state_abbr
|
||||
ORDER BY state_abbr;
|
||||
Reference in New Issue
Block a user