Reorganize project into scripts/, docs/, data/, output/ directories

Move all Python scripts to scripts/, documentation to docs/, raw input
data to data/, and generated HTML/CSV outputs to output/. Update path
references in 8 scripts to use Path(__file__).parent.parent as project
root so they work correctly from the new location. Update README links
and quick-start commands accordingly. Notebooks remain at root.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 21:57:22 -07:00
parent a2e295d95b
commit ee5856661a
40 changed files with 31 additions and 30 deletions

View File

@@ -0,0 +1,217 @@
-- ============================================================
-- LegiScan Legislative Analysis Queries
-- Database: data_centers Schema: public
-- ============================================================
--
-- SETUP
-- Populate the database first:
-- python ingest_legiscan.py --all
-- This downloads ~646 sessions (2016-2026, all US states + federal),
-- loads ~1.3M bills, and tags ~60K as relevant.
--
-- To refresh (weekly dataset updates from LegiScan):
-- python ingest_legiscan.py --fetch --load
-- Already-imported sessions with unchanged dataset_hash are skipped.
--
-- To retag after editing keyword lists in ingest_legiscan.py:
-- python ingest_legiscan.py --tag
--
-- RELEVANCE TAGS (stored in legiscan_bills.relevance_tags[]):
-- data_center - Bills naming data centers, hyperscale, colocation, AI campuses
-- large_load - Crypto mining, large industrial loads, extraordinary load
-- ratepayer_protection- Cost shifting, cross-subsidy, rate design, affordability
-- grid_impact - Grid reliability, transmission, interconnection queue
-- tax_incentive - Tax exemptions/abatements/credits for facilities
-- energy_policy - Renewable PPAs, green tariffs, clean electricity
-- water_use - Cooling water, evaporative cooling, water footprint
-- siting_permitting - Zoning, conditional use permits, local control
--
-- STATUS CODES (legiscan_bills.status):
-- 1=Introduced 2=Engrossed 3=Enrolled 4=Passed 5=Vetoed
-- 6=Failed 7=Override 8=Chaptered 9=Referred 12=Draft
-- ============================================================
-- ── Quick overview ──────────────────────────────────────────
SELECT
COUNT(*) AS total_bills,
COUNT(*) FILTER (WHERE is_relevant) AS relevant_bills,
COUNT(DISTINCT state) AS states,
MIN(ls.year_start) AS year_from,
MAX(ls.year_end) AS year_to
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id);
-- ── Bills per relevance tag ─────────────────────────────────
SELECT
tag,
COUNT(*) AS bill_count,
COUNT(*) FILTER (WHERE lb.status = 4) AS passed,
COUNT(*) FILTER (WHERE lb.status IN (4,8)) AS enacted
FROM legiscan_bills lb, unnest(relevance_tags) AS tag
GROUP BY tag
ORDER BY bill_count DESC;
-- ── Top states for relevant legislation ────────────────────
SELECT
state,
COUNT(*) AS relevant_bills,
COUNT(*) FILTER (WHERE 'data_center' = ANY(relevance_tags)) AS data_center,
COUNT(*) FILTER (WHERE 'large_load' = ANY(relevance_tags)) AS large_load,
COUNT(*) FILTER (WHERE 'ratepayer_protection' = ANY(relevance_tags)) AS ratepayer,
COUNT(*) FILTER (WHERE 'tax_incentive' = ANY(relevance_tags)) AS tax_incentive,
COUNT(*) FILTER (WHERE 'grid_impact' = ANY(relevance_tags)) AS grid_impact
FROM legiscan_bills
WHERE is_relevant
GROUP BY state
ORDER BY relevant_bills DESC
LIMIT 20;
-- ── Trend by year ───────────────────────────────────────────
SELECT
ls.year_start AS year,
COUNT(lb.bill_id) AS total_bills,
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant) AS relevant_bills,
COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant AND lb.status IN (4,8)) AS enacted,
ROUND(100.0 * COUNT(lb.bill_id) FILTER (WHERE lb.is_relevant)
/ NULLIF(COUNT(lb.bill_id), 0), 1) AS pct_relevant
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
GROUP BY ls.year_start
ORDER BY ls.year_start;
-- ── Data center bills specifically ─────────────────────────
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.relevance_tags,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE 'data_center' = ANY(lb.relevance_tags)
ORDER BY
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
ls.year_start DESC,
lb.state;
-- ── Ratepayer protection bills ──────────────────────────────
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.relevance_tags,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)
ORDER BY
CASE lb.status WHEN 4 THEN 0 WHEN 8 THEN 1 WHEN 3 THEN 2 ELSE 3 END,
ls.year_start DESC,
lb.state;
-- ── Bills at intersection of data center + ratepayer ───────
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.relevance_tags,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE 'data_center' = ANY(lb.relevance_tags)
AND 'ratepayer_protection' = ANY(lb.relevance_tags)
ORDER BY ls.year_start DESC, lb.state;
-- ── Large load + grid impact ────────────────────────────────
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.relevance_tags,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE 'large_load' = ANY(lb.relevance_tags)
AND 'grid_impact' = ANY(lb.relevance_tags)
ORDER BY ls.year_start DESC, lb.state;
-- ── Tax incentive bills passed/enacted ─────────────────────
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE 'tax_incentive' = ANY(lb.relevance_tags)
AND lb.status IN (4, 8) -- Passed or Chaptered
ORDER BY ls.year_start DESC, lb.state;
-- ── Join to data centers: states with both DCs and active legislation ──
SELECT
dc.state,
COUNT(DISTINCT dc.id) AS data_centers,
COUNT(DISTINCT lb.bill_id) AS relevant_bills,
COUNT(DISTINCT lb.bill_id)
FILTER (WHERE 'ratepayer_protection' = ANY(lb.relevance_tags)) AS ratepayer_bills,
COUNT(DISTINCT lb.bill_id)
FILTER (WHERE 'data_center' = ANY(lb.relevance_tags)) AS dc_specific_bills,
COUNT(DISTINCT lb.bill_id)
FILTER (WHERE lb.status IN (4,8)) AS enacted_bills
FROM master_data_centers dc
LEFT JOIN legiscan_bills lb ON dc.state = lb.state AND lb.is_relevant
GROUP BY dc.state
ORDER BY relevant_bills DESC;
-- ── Full-text search: find bills mentioning specific terms ──
-- Replace 'hyperscale' with any keyword of interest
SELECT
lb.state,
lb.bill_number,
ls.year_start AS year,
lb.status,
lb.title,
lb.description,
lb.url
FROM legiscan_bills lb
JOIN legiscan_sessions ls USING (session_id)
WHERE to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,''))
@@ to_tsquery('english', 'hyperscale | colocation | "data center"')
ORDER BY ts_rank(
to_tsvector('english', COALESCE(lb.title,'') || ' ' || COALESCE(lb.description,'')),
to_tsquery('english', 'hyperscale | colocation | "data center"')
) DESC
LIMIT 50;
-- ── Session coverage check ──────────────────────────────────
SELECT
state_abbr,
COUNT(*) AS sessions_loaded,
SUM(bill_count) AS total_bills,
MIN(year_start) AS earliest,
MAX(year_end) AS latest
FROM legiscan_sessions
GROUP BY state_abbr
ORDER BY state_abbr;