#!/usr/bin/env python3 import argparse import os import subprocess from pathlib import Path import psycopg2 DB_NAME = "data_centers" TRACT_TABLE = "public.data_center_census_tracts_2024" STAGE_TABLE = "public._watershed_huc8_stage" HUC8_TABLE = "public.watershed_huc8" LINK_TABLE = "public.census_tract_huc8_link" def connect(): return psycopg2.connect( host=os.environ["PGWEB_HOST"], port=os.environ["PGWEB_PORT"], user=os.environ["PGWEB_USER"], password=os.environ["PGWEB_PASSWORD"], dbname=DB_NAME, ) def import_huc8_shapefile(shapefile_path): conn_str = ( f"PG:host={os.environ['PGWEB_HOST']} " f"port={os.environ['PGWEB_PORT']} " f"user={os.environ['PGWEB_USER']} " f"password={os.environ['PGWEB_PASSWORD']} " f"dbname={DB_NAME}" ) source = str(shapefile_path.resolve()) cmd = [ "ogr2ogr", "-f", "PostgreSQL", conn_str, source, "-nln", STAGE_TABLE, "-nlt", "MULTIPOLYGON", "-t_srs", "EPSG:4326", "-lco", "GEOMETRY_NAME=geom", "-lco", "FID=gid", "-lco", "PRECISION=NO", "-unsetFieldWidth", "-skipfailures", "-overwrite", ] subprocess.run(cmd, check=True) def build_final_tables(conn): with conn: with conn.cursor() as cur: cur.execute(f"drop table if exists {HUC8_TABLE}") cur.execute( f""" create table {HUC8_TABLE} as select distinct on (huc8) huc8, name, states, areaacres, areasqkm, loaddate, sourceorig as sourceoriginator, sourcedata as sourcedatadesc, sourcefeat as sourcefeatureid, metasource as metasourceid, tnmid, geom::geometry(MultiPolygon, 4326) as geom from {STAGE_TABLE} where huc8 is not null order by huc8, loaddate desc nulls last """ ) cur.execute(f"alter table {HUC8_TABLE} add primary key (huc8)") cur.execute( f"create index watershed_huc8_geom_gix on {HUC8_TABLE} using gist (geom)" ) cur.execute( f"create index watershed_huc8_states_idx on {HUC8_TABLE} (states)" ) cur.execute(f"drop table if exists {LINK_TABLE}") cur.execute( f""" create table {LINK_TABLE} as select geoid, huc8, overlap_sqm, overlap_sqm / 1000000.0 as overlap_sqkm, overlap_sqm / nullif(tract_sqm, 0.0) as tract_overlap_pct from ( select tr.geoid, wh.huc8, st_area( st_intersection( tr.geom::geography, wh.geom::geography ) ) as overlap_sqm, st_area(tr.geom::geography) as tract_sqm from {TRACT_TABLE} tr join {HUC8_TABLE} wh on st_intersects(tr.geom, wh.geom) ) as overlap_rows where overlap_sqm > 0 """ ) cur.execute( f"create index census_tract_huc8_link_geoid_idx on {LINK_TABLE} (geoid)" ) cur.execute( f"create index census_tract_huc8_link_huc8_idx on {LINK_TABLE} (huc8)" ) cur.execute(f"analyze {STAGE_TABLE}") cur.execute(f"analyze {HUC8_TABLE}") cur.execute(f"analyze {LINK_TABLE}") def parse_args(): parser = argparse.ArgumentParser( description=( "Build watershed HUC8 boundaries and GEOID linkage tables from " "a local HUC8 shapefile." ) ) parser.add_argument( "--shapefile", default="HUC8_CONUS/HUC8_US.shp", help="Path to the HUC8 shapefile to import.", ) parser.add_argument( "--build-only", action="store_true", help="Skip imports and rebuild final/link tables from existing stage data.", ) return parser.parse_args() def main(): args = parse_args() shapefile_path = Path(args.shapefile) if not args.build_only and not shapefile_path.exists(): raise FileNotFoundError(f"shapefile not found: {shapefile_path}") if not args.build_only: print(f"importing HUC8 shapefile from {shapefile_path}") import_huc8_shapefile(shapefile_path) conn = connect() try: build_final_tables(conn) with conn.cursor() as cur: cur.execute(f"select count(*) from {HUC8_TABLE}") huc8_rows = cur.fetchone()[0] cur.execute(f"select count(*) from {LINK_TABLE}") link_rows = cur.fetchone()[0] finally: conn.close() print( f"done: source={shapefile_path}, huc8_rows={huc8_rows}, " f"geoid_huc8_links={link_rows}" ) if __name__ == "__main__": main()