first commit

This commit is contained in:
2026-05-15 20:48:41 -07:00
commit f57969c9ee
34 changed files with 89262 additions and 0 deletions

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env python3
import argparse
import os
import subprocess
from pathlib import Path
import psycopg2
DB_NAME = "data_centers"
TRACT_TABLE = "public.data_center_census_tracts_2024"
STAGE_TABLE = "public._watershed_huc8_stage"
HUC8_TABLE = "public.watershed_huc8"
LINK_TABLE = "public.census_tract_huc8_link"
def connect():
return psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname=DB_NAME,
)
def import_huc8_shapefile(shapefile_path):
conn_str = (
f"PG:host={os.environ['PGWEB_HOST']} "
f"port={os.environ['PGWEB_PORT']} "
f"user={os.environ['PGWEB_USER']} "
f"password={os.environ['PGWEB_PASSWORD']} "
f"dbname={DB_NAME}"
)
source = str(shapefile_path.resolve())
cmd = [
"ogr2ogr",
"-f",
"PostgreSQL",
conn_str,
source,
"-nln",
STAGE_TABLE,
"-nlt",
"MULTIPOLYGON",
"-t_srs",
"EPSG:4326",
"-lco",
"GEOMETRY_NAME=geom",
"-lco",
"FID=gid",
"-lco",
"PRECISION=NO",
"-unsetFieldWidth",
"-skipfailures",
"-overwrite",
]
subprocess.run(cmd, check=True)
def build_final_tables(conn):
with conn:
with conn.cursor() as cur:
cur.execute(f"drop table if exists {HUC8_TABLE}")
cur.execute(
f"""
create table {HUC8_TABLE} as
select distinct on (huc8)
huc8,
name,
states,
areaacres,
areasqkm,
loaddate,
sourceorig as sourceoriginator,
sourcedata as sourcedatadesc,
sourcefeat as sourcefeatureid,
metasource as metasourceid,
tnmid,
geom::geometry(MultiPolygon, 4326) as geom
from {STAGE_TABLE}
where huc8 is not null
order by huc8, loaddate desc nulls last
"""
)
cur.execute(f"alter table {HUC8_TABLE} add primary key (huc8)")
cur.execute(
f"create index watershed_huc8_geom_gix on {HUC8_TABLE} using gist (geom)"
)
cur.execute(
f"create index watershed_huc8_states_idx on {HUC8_TABLE} (states)"
)
cur.execute(f"drop table if exists {LINK_TABLE}")
cur.execute(
f"""
create table {LINK_TABLE} as
select
geoid,
huc8,
overlap_sqm,
overlap_sqm / 1000000.0 as overlap_sqkm,
overlap_sqm / nullif(tract_sqm, 0.0) as tract_overlap_pct
from (
select
tr.geoid,
wh.huc8,
st_area(
st_intersection(
tr.geom::geography,
wh.geom::geography
)
) as overlap_sqm,
st_area(tr.geom::geography) as tract_sqm
from {TRACT_TABLE} tr
join {HUC8_TABLE} wh
on st_intersects(tr.geom, wh.geom)
) as overlap_rows
where overlap_sqm > 0
"""
)
cur.execute(
f"create index census_tract_huc8_link_geoid_idx on {LINK_TABLE} (geoid)"
)
cur.execute(
f"create index census_tract_huc8_link_huc8_idx on {LINK_TABLE} (huc8)"
)
cur.execute(f"analyze {STAGE_TABLE}")
cur.execute(f"analyze {HUC8_TABLE}")
cur.execute(f"analyze {LINK_TABLE}")
def parse_args():
parser = argparse.ArgumentParser(
description=(
"Build watershed HUC8 boundaries and GEOID linkage tables from "
"a local HUC8 shapefile."
)
)
parser.add_argument(
"--shapefile",
default="HUC8_CONUS/HUC8_US.shp",
help="Path to the HUC8 shapefile to import.",
)
parser.add_argument(
"--build-only",
action="store_true",
help="Skip imports and rebuild final/link tables from existing stage data.",
)
return parser.parse_args()
def main():
args = parse_args()
shapefile_path = Path(args.shapefile)
if not args.build_only and not shapefile_path.exists():
raise FileNotFoundError(f"shapefile not found: {shapefile_path}")
if not args.build_only:
print(f"importing HUC8 shapefile from {shapefile_path}")
import_huc8_shapefile(shapefile_path)
conn = connect()
try:
build_final_tables(conn)
with conn.cursor() as cur:
cur.execute(f"select count(*) from {HUC8_TABLE}")
huc8_rows = cur.fetchone()[0]
cur.execute(f"select count(*) from {LINK_TABLE}")
link_rows = cur.fetchone()[0]
finally:
conn.close()
print(
f"done: source={shapefile_path}, huc8_rows={huc8_rows}, "
f"geoid_huc8_links={link_rows}"
)
if __name__ == "__main__":
main()