185 lines
5.3 KiB
Python
185 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import os
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import psycopg2
|
|
|
|
|
|
DB_NAME = "data_centers"
|
|
TRACT_TABLE = "public.data_center_census_tracts_2024"
|
|
STAGE_TABLE = "public._watershed_huc8_stage"
|
|
HUC8_TABLE = "public.watershed_huc8"
|
|
LINK_TABLE = "public.census_tract_huc8_link"
|
|
|
|
|
|
def connect():
|
|
return psycopg2.connect(
|
|
host=os.environ["PGWEB_HOST"],
|
|
port=os.environ["PGWEB_PORT"],
|
|
user=os.environ["PGWEB_USER"],
|
|
password=os.environ["PGWEB_PASSWORD"],
|
|
dbname=DB_NAME,
|
|
)
|
|
|
|
|
|
def import_huc8_shapefile(shapefile_path):
|
|
conn_str = (
|
|
f"PG:host={os.environ['PGWEB_HOST']} "
|
|
f"port={os.environ['PGWEB_PORT']} "
|
|
f"user={os.environ['PGWEB_USER']} "
|
|
f"password={os.environ['PGWEB_PASSWORD']} "
|
|
f"dbname={DB_NAME}"
|
|
)
|
|
source = str(shapefile_path.resolve())
|
|
|
|
cmd = [
|
|
"ogr2ogr",
|
|
"-f",
|
|
"PostgreSQL",
|
|
conn_str,
|
|
source,
|
|
"-nln",
|
|
STAGE_TABLE,
|
|
"-nlt",
|
|
"MULTIPOLYGON",
|
|
"-t_srs",
|
|
"EPSG:4326",
|
|
"-lco",
|
|
"GEOMETRY_NAME=geom",
|
|
"-lco",
|
|
"FID=gid",
|
|
"-lco",
|
|
"PRECISION=NO",
|
|
"-unsetFieldWidth",
|
|
"-skipfailures",
|
|
"-overwrite",
|
|
]
|
|
|
|
subprocess.run(cmd, check=True)
|
|
|
|
|
|
def build_final_tables(conn):
|
|
with conn:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"drop table if exists {HUC8_TABLE}")
|
|
cur.execute(
|
|
f"""
|
|
create table {HUC8_TABLE} as
|
|
select distinct on (huc8)
|
|
huc8,
|
|
name,
|
|
states,
|
|
areaacres,
|
|
areasqkm,
|
|
loaddate,
|
|
sourceorig as sourceoriginator,
|
|
sourcedata as sourcedatadesc,
|
|
sourcefeat as sourcefeatureid,
|
|
metasource as metasourceid,
|
|
tnmid,
|
|
geom::geometry(MultiPolygon, 4326) as geom
|
|
from {STAGE_TABLE}
|
|
where huc8 is not null
|
|
order by huc8, loaddate desc nulls last
|
|
"""
|
|
)
|
|
cur.execute(f"alter table {HUC8_TABLE} add primary key (huc8)")
|
|
cur.execute(
|
|
f"create index watershed_huc8_geom_gix on {HUC8_TABLE} using gist (geom)"
|
|
)
|
|
cur.execute(
|
|
f"create index watershed_huc8_states_idx on {HUC8_TABLE} (states)"
|
|
)
|
|
|
|
cur.execute(f"drop table if exists {LINK_TABLE}")
|
|
cur.execute(
|
|
f"""
|
|
create table {LINK_TABLE} as
|
|
select
|
|
geoid,
|
|
huc8,
|
|
overlap_sqm,
|
|
overlap_sqm / 1000000.0 as overlap_sqkm,
|
|
overlap_sqm / nullif(tract_sqm, 0.0) as tract_overlap_pct
|
|
from (
|
|
select
|
|
tr.geoid,
|
|
wh.huc8,
|
|
st_area(
|
|
st_intersection(
|
|
tr.geom::geography,
|
|
wh.geom::geography
|
|
)
|
|
) as overlap_sqm,
|
|
st_area(tr.geom::geography) as tract_sqm
|
|
from {TRACT_TABLE} tr
|
|
join {HUC8_TABLE} wh
|
|
on st_intersects(tr.geom, wh.geom)
|
|
) as overlap_rows
|
|
where overlap_sqm > 0
|
|
"""
|
|
)
|
|
cur.execute(
|
|
f"create index census_tract_huc8_link_geoid_idx on {LINK_TABLE} (geoid)"
|
|
)
|
|
cur.execute(
|
|
f"create index census_tract_huc8_link_huc8_idx on {LINK_TABLE} (huc8)"
|
|
)
|
|
|
|
cur.execute(f"analyze {STAGE_TABLE}")
|
|
cur.execute(f"analyze {HUC8_TABLE}")
|
|
cur.execute(f"analyze {LINK_TABLE}")
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"Build watershed HUC8 boundaries and GEOID linkage tables from "
|
|
"a local HUC8 shapefile."
|
|
)
|
|
)
|
|
parser.add_argument(
|
|
"--shapefile",
|
|
default="HUC8_CONUS/HUC8_US.shp",
|
|
help="Path to the HUC8 shapefile to import.",
|
|
)
|
|
parser.add_argument(
|
|
"--build-only",
|
|
action="store_true",
|
|
help="Skip imports and rebuild final/link tables from existing stage data.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
shapefile_path = Path(args.shapefile)
|
|
if not args.build_only and not shapefile_path.exists():
|
|
raise FileNotFoundError(f"shapefile not found: {shapefile_path}")
|
|
|
|
if not args.build_only:
|
|
print(f"importing HUC8 shapefile from {shapefile_path}")
|
|
import_huc8_shapefile(shapefile_path)
|
|
|
|
conn = connect()
|
|
try:
|
|
build_final_tables(conn)
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"select count(*) from {HUC8_TABLE}")
|
|
huc8_rows = cur.fetchone()[0]
|
|
cur.execute(f"select count(*) from {LINK_TABLE}")
|
|
link_rows = cur.fetchone()[0]
|
|
finally:
|
|
conn.close()
|
|
|
|
print(
|
|
f"done: source={shapefile_path}, huc8_rows={huc8_rows}, "
|
|
f"geoid_huc8_links={link_rows}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|