Add FCC broadband build workflow and refresh enhanced cluster map

This commit is contained in:
2026-05-22 12:51:36 -07:00
parent 4f3dbfc7f9
commit dc8755cde0
4 changed files with 144478 additions and 53430 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,397 @@
#!/usr/bin/env python3
"""Build data-center broadband connection tables.
Creates a per-data-center broadband connection table and, when FCC BDC API
credentials are available, stores the FCC BDC public download catalog.
Required DB env vars:
PGWEB_HOST, PGWEB_PORT, PGWEB_USER, PGWEB_PASSWORD
FCC API env vars:
FCC_USERNAME or FCC_BDC_USERNAME - FCC User Registration username/email
FCC_API_KEY or FCC_HASH_VALUE - BDC public API hash_value token
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import sys
from datetime import date, datetime
from pathlib import Path
from typing import Any
import psycopg2
import requests
from psycopg2.extras import Json, execute_values
DB_NAME = "data_centers"
MASTER_TABLE = "public.master_data_centers"
TRACT_TABLE = "public.data_center_census_tracts_2024"
AS_OF_TABLE = "public.fcc_bdc_api_as_of_dates"
FILES_TABLE = "public.fcc_bdc_availability_files"
CONNECTION_TABLE = "public.data_center_broadband_connection"
FCC_BASE_URL = "https://broadbandmap.fcc.gov/api/public"
USER_AGENT = "data-center-fcc-bdc-loader/1.0"
def load_zsh_secrets() -> None:
"""Load shell secrets into this process without printing values."""
secrets = Path.home() / ".zsh_secrets"
if not secrets.exists():
return
result = subprocess.run(
["zsh", "-lc", "source ~/.zsh_secrets >/dev/null 2>&1; env"],
check=True,
capture_output=True,
text=True,
)
for line in result.stdout.splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
if key and key not in os.environ:
os.environ[key] = value
def require_env(keys: list[str]) -> None:
missing = [k for k in keys if not os.getenv(k)]
if missing:
raise RuntimeError("Missing required env vars: " + ", ".join(missing))
def get_conn():
return psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname="data_centers",
)
def fcc_credentials() -> tuple[str | None, str | None]:
username = os.getenv("FCC_USERNAME") or os.getenv("FCC_BDC_USERNAME")
hash_value = os.getenv("FCC_API_KEY") or os.getenv("FCC_HASH_VALUE")
return username, hash_value
def fcc_get(path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
username, hash_value = fcc_credentials()
if not username or not hash_value:
raise RuntimeError(
"FCC BDC API requires FCC_USERNAME or FCC_BDC_USERNAME plus "
"FCC_API_KEY or FCC_HASH_VALUE."
)
url = f"{FCC_BASE_URL}{path}"
headers = {
"username": username,
"hash_value": hash_value,
"user-agent": USER_AGENT,
"accept": "application/json",
}
response = requests.get(url, headers=headers, params=params or {}, timeout=60)
response.raise_for_status()
payload = response.json()
if str(payload.get("status_code")) in {"401", "403"} or payload.get("status") == "fail":
raise RuntimeError(f"FCC API error for {path}: {payload}")
return payload
def parse_date(value: Any) -> date | None:
if value in (None, ""):
return None
if isinstance(value, date):
return value
return datetime.strptime(str(value)[:10], "%Y-%m-%d").date()
def to_int(value: Any) -> int | None:
if value in (None, ""):
return None
try:
return int(str(value).replace(",", ""))
except (TypeError, ValueError):
return None
def create_tables(cur) -> None:
cur.execute("create extension if not exists postgis")
cur.execute(
f"""
create table if not exists {AS_OF_TABLE} (
data_type text not null,
as_of_date date not null,
raw jsonb not null,
fetched_at timestamptz not null default now(),
primary key (data_type, as_of_date)
)
"""
)
cur.execute(
f"""
create table if not exists {FILES_TABLE} (
as_of_date date not null,
file_id bigint not null,
category text,
subcategory text,
technology_type text,
technology_code text,
technology_code_desc text,
speed_tier text,
state_fips text,
state_name text,
provider_id bigint,
provider_name text,
file_type text,
file_name text,
record_count bigint,
raw jsonb not null,
fetched_at timestamptz not null default now(),
primary key (as_of_date, file_id)
)
"""
)
cur.execute(
f"create index if not exists fcc_bdc_availability_files_category_idx "
f"on {FILES_TABLE} (category, subcategory)"
)
cur.execute(
f"create index if not exists fcc_bdc_availability_files_state_idx "
f"on {FILES_TABLE} (state_fips)"
)
cur.execute(
f"create index if not exists fcc_bdc_availability_files_provider_idx "
f"on {FILES_TABLE} (provider_id)"
)
cur.execute(
f"""
create table if not exists {CONNECTION_TABLE} (
master_id text primary key references public.master_data_centers(master_id) on delete cascade,
source text,
name text,
operator text,
city text,
state text,
country text,
longitude double precision,
latitude double precision,
geom geometry(Point, 4326),
census_tract_geoid text,
census_broadband_subscription_pct numeric,
fcc_bdc_status text not null,
fcc_bdc_as_of_date date,
fcc_bdc_geography_type text,
fcc_bdc_geoid text,
fcc_provider_count integer,
fcc_fiber_provider_count integer,
fcc_cable_provider_count integer,
fcc_fixed_wireless_provider_count integer,
fcc_max_advertised_download_mbps numeric,
fcc_max_advertised_upload_mbps numeric,
fcc_100_20_provider_count integer,
fcc_summary_json jsonb,
fetched_at timestamptz not null default now(),
updated_at timestamptz not null default now()
)
"""
)
cur.execute(
f"create index if not exists data_center_broadband_connection_geom_gix "
f"on {CONNECTION_TABLE} using gist (geom)"
)
cur.execute(
f"create index if not exists data_center_broadband_connection_tract_idx "
f"on {CONNECTION_TABLE} (census_tract_geoid)"
)
cur.execute(
f"create index if not exists data_center_broadband_connection_status_idx "
f"on {CONNECTION_TABLE} (fcc_bdc_status)"
)
def rebuild_connection_base(cur, status: str) -> int:
cur.execute(f"truncate {CONNECTION_TABLE}")
cur.execute(
f"""
insert into {CONNECTION_TABLE} (
master_id, source, name, operator, city, state, country,
longitude, latitude, geom,
census_tract_geoid, census_broadband_subscription_pct,
fcc_bdc_status
)
select
dc.master_id, dc.source, dc.name, dc.operator, dc.city, dc.state, dc.country,
dc.longitude, dc.latitude, dc.geom,
dc.geoid as census_tract_geoid,
tr.broadband_subscription_pct as census_broadband_subscription_pct,
%s as fcc_bdc_status
from {MASTER_TABLE} dc
left join {TRACT_TABLE} tr on tr.geoid::text = dc.geoid::text
"""
,
(status,),
)
cur.execute(f"select count(*) from {CONNECTION_TABLE}")
return cur.fetchone()[0]
def latest_availability_date(rows: list[dict[str, Any]]) -> date | None:
dates = [
parse_date(r.get("as_of_date"))
for r in rows
if str(r.get("data_type", "")).lower() in {"availability", "availability data"}
]
dates = [d for d in dates if d is not None]
return max(dates) if dates else None
def load_as_of_dates(cur) -> date:
payload = fcc_get("/map/listAsOfDates")
rows = payload.get("data") or []
values = []
for row in rows:
as_of_date = parse_date(row.get("as_of_date"))
if not as_of_date:
continue
values.append((row.get("data_type"), as_of_date, Json(row)))
if values:
execute_values(
cur,
f"""
insert into {AS_OF_TABLE} (data_type, as_of_date, raw)
values %s
on conflict (data_type, as_of_date) do update set
raw = excluded.raw,
fetched_at = now()
""",
values,
page_size=1000,
)
latest = latest_availability_date(rows)
if latest is None:
raise RuntimeError(f"Could not find an availability as_of_date in FCC response: {rows}")
return latest
def load_availability_file_catalog(cur, as_of_date: date) -> int:
payload = fcc_get(
f"/map/downloads/listAvailabilityData/{as_of_date:%Y-%m-%d}",
params={"technology_type": "Fixed Broadband"},
)
rows = payload.get("data") or []
values = []
for row in rows:
file_id = to_int(row.get("file_id"))
if file_id is None:
continue
values.append(
(
as_of_date,
file_id,
row.get("category"),
row.get("subcategory"),
row.get("technology_type"),
row.get("technology_code"),
row.get("technology_code_desc"),
row.get("speed_tier"),
row.get("state_fips"),
row.get("state_name"),
to_int(row.get("provider_id")),
row.get("provider_name"),
row.get("file_type"),
row.get("file_name"),
to_int(row.get("record_count")),
Json(row),
)
)
if values:
cur.execute(f"delete from {FILES_TABLE} where as_of_date = %s", (as_of_date,))
execute_values(
cur,
f"""
insert into {FILES_TABLE} (
as_of_date, file_id, category, subcategory, technology_type,
technology_code, technology_code_desc, speed_tier, state_fips,
state_name, provider_id, provider_name, file_type, file_name,
record_count, raw
)
values %s
""",
values,
page_size=1000,
)
return len(values)
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--skip-fcc", action="store_true", help="Only create/rebuild the base connection table.")
parser.add_argument("--as-of-date", help="FCC BDC availability as-of date, YYYY-MM-DD. Defaults to latest.")
args = parser.parse_args()
load_zsh_secrets()
require_env(["PGWEB_HOST", "PGWEB_PORT", "PGWEB_USER", "PGWEB_PASSWORD"])
username, hash_value = fcc_credentials()
status = "pending_fcc_username" if hash_value and not username else "pending_fcc_catalog"
if args.skip_fcc:
status = "fcc_skipped"
with get_conn() as conn:
with conn.cursor() as cur:
create_tables(cur)
n_connection = rebuild_connection_base(cur, status)
print(f"{CONNECTION_TABLE}: {n_connection:,} base rows")
if args.skip_fcc:
conn.commit()
return 0
if not username or not hash_value:
print(
"FCC catalog not loaded: set FCC_USERNAME or FCC_BDC_USERNAME "
"alongside FCC_API_KEY/FCC_HASH_VALUE in ~/.zsh_secrets.",
file=sys.stderr,
)
conn.commit()
return 2
as_of_date = parse_date(args.as_of_date) if args.as_of_date else load_as_of_dates(cur)
n_files = load_availability_file_catalog(cur, as_of_date)
cur.execute(
f"""
update {CONNECTION_TABLE}
set fcc_bdc_status = 'fcc_catalog_loaded',
fcc_bdc_as_of_date = %s,
updated_at = now()
""",
(as_of_date,),
)
conn.commit()
print(f"{AS_OF_TABLE}: loaded latest availability date {as_of_date}")
print(f"{FILES_TABLE}: {n_files:,} fixed-broadband file catalog rows")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -31,6 +31,7 @@
"source": [
"import os\n",
"import json\n",
"import subprocess\n",
"from html import escape\n",
"from pathlib import Path\n",
"\n",
@@ -39,10 +40,12 @@
"\n",
"import pandas as pd\n",
"import folium\n",
"import psycopg2\n",
"from folium import plugins\n",
"\n",
"print('pandas:', pd.__version__)\n",
"print('folium:', folium.__version__)\n"
"print('folium:', folium.__version__)\n",
"print('psycopg2:', psycopg2.__version__)\n"
]
},
{
@@ -81,6 +84,17 @@
"SHOW_HUC8_LAYER = True\n",
"SHOW_STATE_ENERGY_LAYER = True\n",
"\n",
"# Existing DB-backed overlays.\n",
"ENABLE_DB_LAYER_LOAD = True\n",
"SHOW_INTERNET_CABLES_LAYER = True\n",
"SHOW_OPPOSITION_CASES_LAYER = True\n",
"SHOW_DROUGHT_AND_SMOKE_CONTEXT = True\n",
"\n",
"# New requested overlays.\n",
"SHOW_CLIMATE_LAYER = True\n",
"SHOW_BROADBAND_LAYER = True\n",
"SHOW_ELECTION_LAYER = True\n",
"\n",
"OUTPUT_DIR.mkdir(exist_ok=True)\n",
"print('points:', POINTS_CSV)\n",
"print('clusters:', CLUSTERS_CSV)\n",
@@ -159,9 +173,244 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"DB_NAME = 'data_centers'\n",
"DB_REQUIRED_ENV = ['PGWEB_HOST', 'PGWEB_PORT', 'PGWEB_USER', 'PGWEB_PASSWORD']\n",
"\n",
"internet_cables_geojson = None\n",
"opposition_cases = pd.DataFrame()\n",
"drought_context = pd.DataFrame()\n",
"smoke_context = pd.DataFrame()\n",
"climate_context = pd.DataFrame()\n",
"broadband_context = pd.DataFrame()\n",
"election_context = pd.DataFrame()\n",
"\n",
"\n",
"def load_zsh_secrets() -> None:\n",
" secrets = Path.home() / '.zsh_secrets'\n",
" if not secrets.exists():\n",
" return\n",
" result = subprocess.run(\n",
" ['zsh', '-lc', 'source ~/.zsh_secrets >/dev/null 2>&1; env'],\n",
" check=True,\n",
" capture_output=True,\n",
" text=True,\n",
" )\n",
" for line in result.stdout.splitlines():\n",
" if '=' not in line:\n",
" continue\n",
" key, value = line.split('=', 1)\n",
" if key and key not in os.environ:\n",
" os.environ[key] = value\n",
"\n",
"\n",
"def db_ready() -> bool:\n",
" return all(os.getenv(k) for k in DB_REQUIRED_ENV)\n",
"\n",
"\n",
"def get_conn():\n",
" return psycopg2.connect(\n",
" host=os.environ['PGWEB_HOST'],\n",
" port=os.environ['PGWEB_PORT'],\n",
" user=os.environ['PGWEB_USER'],\n",
" password=os.environ['PGWEB_PASSWORD'],\n",
" dbname=DB_NAME,\n",
" )\n",
"\n",
"\n",
"def load_optional_db_layers() -> None:\n",
" global internet_cables_geojson, opposition_cases, drought_context, smoke_context\n",
" global climate_context, broadband_context, election_context, points\n",
"\n",
" if not ENABLE_DB_LAYER_LOAD:\n",
" print('DB layer load disabled')\n",
" return\n",
"\n",
" load_zsh_secrets()\n",
" if not db_ready():\n",
" print('Skipping DB-backed layers: missing PGWEB_* environment variables')\n",
" return\n",
"\n",
" with get_conn() as conn:\n",
" if SHOW_INTERNET_CABLES_LAYER:\n",
" cable_sql = \"\"\"\n",
" select json_build_object(\n",
" 'type','FeatureCollection',\n",
" 'features', coalesce(json_agg(\n",
" json_build_object(\n",
" 'type','Feature',\n",
" 'geometry', ST_AsGeoJSON(geom)::json,\n",
" 'properties', json_build_object(\n",
" 'feature_id', feature_id,\n",
" 'name', name,\n",
" 'owners', owners,\n",
" 'rfs_year', rfs_year,\n",
" 'decommission_year', decommission_year,\n",
" 'length_km', length_km,\n",
" 'cable_type', cable_type\n",
" )\n",
" )\n",
" ), '[]'::json)\n",
" ) as fc\n",
" from public.internet_cables\n",
" where geom is not null\n",
" \"\"\"\n",
" internet_cables_geojson = pd.read_sql(cable_sql, conn).iloc[0]['fc']\n",
" n_cables = len(internet_cables_geojson.get('features', [])) if internet_cables_geojson else 0\n",
" print(f'internet_cables features: {n_cables:,}')\n",
"\n",
" if SHOW_OPPOSITION_CASES_LAYER:\n",
" opposition_sql = \"\"\"\n",
" select\n",
" id, location, state, lat, lon, investment_billion, status,\n",
" developer, commons_type, governance_response, outcome, opposition_type, data_source\n",
" from public.opposition_cases_geocoded\n",
" where lat is not null and lon is not null\n",
" \"\"\"\n",
" opposition_cases = pd.read_sql(opposition_sql, conn)\n",
" print(f'opposition_cases rows: {len(opposition_cases):,}')\n",
"\n",
" if SHOW_DROUGHT_AND_SMOKE_CONTEXT:\n",
" drought_sql = \"\"\"\n",
" select\n",
" master_id, usdm_status, worst_dm_category, mean_dm_category,\n",
" pct_weeks_in_d2_or_worse, pct_weeks_in_d3_or_worse,\n",
" longest_d2_streak_weeks, longest_d3_streak_weeks\n",
" from public.data_center_usdm_drought_exposure\n",
" \"\"\"\n",
" smoke_sql = \"\"\"\n",
" select\n",
" master_id, hms_status, smoke_period_start, smoke_period_end,\n",
" days_observed, days_with_any_smoke, days_with_heavy_smoke,\n",
" pct_days_with_any_smoke, pct_days_with_heavy_smoke,\n",
" worst_density, mean_density_rank\n",
" from public.data_center_hms_smoke_exposure\n",
" \"\"\"\n",
" drought_context = pd.read_sql(drought_sql, conn)\n",
" smoke_context = pd.read_sql(smoke_sql, conn)\n",
" print(f'drought_context rows: {len(drought_context):,}')\n",
" print(f'smoke_context rows: {len(smoke_context):,}')\n",
"\n",
" if not drought_context.empty:\n",
" cols = [c for c in drought_context.columns if c != 'master_id']\n",
" points = points.merge(drought_context[['master_id'] + cols], on='master_id', how='left')\n",
"\n",
" if not smoke_context.empty:\n",
" cols = [c for c in smoke_context.columns if c != 'master_id']\n",
" points = points.merge(smoke_context[['master_id'] + cols], on='master_id', how='left')\n",
"\n",
" if SHOW_CLIMATE_LAYER:\n",
" climate_sql = \"\"\"\n",
" select\n",
" master_id, mean_annual_temperature_c, mean_summer_temperature_c,\n",
" max_wet_bulb_temperature_c, extreme_heat_days,\n",
" annual_cooling_degree_days_c_mean, annual_precipitation_mm_mean\n",
" from public.data_center_historical_climate\n",
" \"\"\"\n",
" climate_context = pd.read_sql(climate_sql, conn)\n",
" print(f'climate_context rows: {len(climate_context):,}')\n",
" if not climate_context.empty:\n",
" cols = [c for c in climate_context.columns if c != 'master_id']\n",
" points = points.merge(climate_context[['master_id'] + cols], on='master_id', how='left')\n",
"\n",
" if SHOW_BROADBAND_LAYER:\n",
" broadband_sql = \"\"\"\n",
" select\n",
" master_id, census_broadband_subscription_pct,\n",
" fcc_bdc_status, fcc_bdc_as_of_date,\n",
" fcc_provider_count, fcc_fiber_provider_count, fcc_cable_provider_count,\n",
" fcc_fixed_wireless_provider_count,\n",
" fcc_max_advertised_download_mbps, fcc_max_advertised_upload_mbps,\n",
" fcc_100_20_provider_count\n",
" from public.data_center_broadband_connection\n",
" \"\"\"\n",
" broadband_context = pd.read_sql(broadband_sql, conn)\n",
" print(f'broadband_context rows: {len(broadband_context):,}')\n",
" if not broadband_context.empty:\n",
" cols = [c for c in broadband_context.columns if c != 'master_id']\n",
" points = points.merge(broadband_context[['master_id'] + cols], on='master_id', how='left')\n",
"\n",
" if SHOW_ELECTION_LAYER:\n",
" election_sql = \"\"\"\n",
" with best_match as (\n",
" select distinct on (m.master_id)\n",
" m.master_id,\n",
" m.state_code as election_state_code,\n",
" m.join_method as election_join_method,\n",
" m.match_distance_m as election_match_distance_m,\n",
" f.feature_id, f.layer_id, f.properties,\n",
" ST_Y(ST_PointOnSurface(f.geom)) as election_latitude,\n",
" ST_X(ST_PointOnSurface(f.geom)) as election_longitude\n",
" from public.data_center_rdh_precinct_vote_matches m\n",
" join public.rdh_precinct_vote_features f\n",
" on f.feature_id = m.feature_id and f.layer_id = m.layer_id\n",
" where f.geom is not null\n",
" order by m.master_id,\n",
" case m.join_method when 'point_in_precinct' then 0 else 1 end,\n",
" m.match_distance_m asc nulls last\n",
" )\n",
" select\n",
" master_id, election_state_code, election_join_method, election_match_distance_m,\n",
" feature_id, layer_id, election_latitude, election_longitude,\n",
" coalesce((properties->>'LOCALITY'), '') as election_locality,\n",
" coalesce((properties->>'PRECINCT'), '') as election_precinct,\n",
" nullif(properties->>'G20PREDBID','')::double precision as election_biden_votes,\n",
" nullif(properties->>'G20PRERTRU','')::double precision as election_trump_votes,\n",
" case\n",
" when (coalesce(nullif(properties->>'G20PREDBID','')::double precision,0)\n",
" + coalesce(nullif(properties->>'G20PRERTRU','')::double precision,0)) > 0\n",
" then 100.0 * coalesce(nullif(properties->>'G20PREDBID','')::double precision,0)\n",
" / (coalesce(nullif(properties->>'G20PREDBID','')::double precision,0)\n",
" + coalesce(nullif(properties->>'G20PRERTRU','')::double precision,0))\n",
" end as election_biden_share_pct,\n",
" case\n",
" when (coalesce(nullif(properties->>'G20PREDBID','')::double precision,0)\n",
" + coalesce(nullif(properties->>'G20PRERTRU','')::double precision,0)) > 0\n",
" then 100.0 * coalesce(nullif(properties->>'G20PRERTRU','')::double precision,0)\n",
" / (coalesce(nullif(properties->>'G20PREDBID','')::double precision,0)\n",
" + coalesce(nullif(properties->>'G20PRERTRU','')::double precision,0))\n",
" end as election_trump_share_pct\n",
" from best_match\n",
" \"\"\"\n",
" election_context = pd.read_sql(election_sql, conn)\n",
" if not election_context.empty:\n",
" election_context['election_trump_margin_pct'] = (\n",
" election_context['election_trump_share_pct'] - election_context['election_biden_share_pct']\n",
" )\n",
" print(f'election_context rows: {len(election_context):,}')\n",
" if not election_context.empty:\n",
" cols = [c for c in election_context.columns if c != 'master_id']\n",
" points = points.merge(election_context[['master_id'] + cols], on='master_id', how='left')\n",
"\n",
"\n",
"load_optional_db_layers()"
]
},
{
"cell_type": "markdown",
"id": "7",
"metadata": {},
"source": [
"## Optional DB-backed Layer Context\n",
"\n",
"This section pulls additional overlays directly from PostGIS:\n",
"- `public.internet_cables` (line layer)\n",
"- `public.opposition_cases_geocoded` (point layer)\n",
"- `public.data_center_usdm_drought_exposure` (point popup enrichment)\n",
"- `public.data_center_hms_smoke_exposure` (point popup enrichment)\n",
"\n",
"If DB credentials are unavailable, map generation still works with CSV/GeoJSON sources."
]
},
{
"cell_type": "markdown",
"id": "8",
"metadata": {},
"source": [
"## Map Helpers"
]
@@ -169,7 +418,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7",
"id": "9",
"metadata": {},
"outputs": [],
"source": [
@@ -180,6 +429,8 @@
"NOISE_COLOR = '#9ca3af'\n",
"CENTROID_COLOR = '#111827'\n",
"STATE_ENERGY_COLOR = '#f59e0b'\n",
"INTERNET_CABLE_COLOR = '#7c3aed'\n",
"OPPOSITION_CASE_COLOR = '#b91c1c'\n",
"\n",
"cluster_info = clusters.set_index('cluster_id').to_dict('index')\n",
"\n",
@@ -217,6 +468,50 @@
" return f'Cluster ID {cluster_id}', f'{point_count:,}', f'Rank {rank} of {n_clusters} by size'\n",
"\n",
"\n",
"def climate_color(mean_summer_c):\n",
" if pd.isna(mean_summer_c):\n",
" return '#94a3b8'\n",
" if mean_summer_c >= 32:\n",
" return '#7f1d1d'\n",
" if mean_summer_c >= 29:\n",
" return '#b91c1c'\n",
" if mean_summer_c >= 26:\n",
" return '#ea580c'\n",
" if mean_summer_c >= 23:\n",
" return '#f59e0b'\n",
" return '#0284c7'\n",
"\n",
"\n",
"def broadband_color(provider_count):\n",
" if pd.isna(provider_count):\n",
" return '#94a3b8'\n",
" p = float(provider_count)\n",
" if p >= 20:\n",
" return '#166534'\n",
" if p >= 10:\n",
" return '#16a34a'\n",
" if p >= 5:\n",
" return '#65a30d'\n",
" if p >= 2:\n",
" return '#ca8a04'\n",
" return '#b45309'\n",
"\n",
"\n",
"def election_color(margin_pct):\n",
" if pd.isna(margin_pct):\n",
" return '#94a3b8'\n",
" m = float(margin_pct)\n",
" if m >= 20:\n",
" return '#7f1d1d'\n",
" if m >= 5:\n",
" return '#dc2626'\n",
" if m <= -20:\n",
" return '#1e3a8a'\n",
" if m <= -5:\n",
" return '#2563eb'\n",
" return '#6b7280'\n",
"\n",
"\n",
"def point_popup(row):\n",
" cluster_label, cluster_size, cluster_rank = cluster_label_and_size(row.cluster_id)\n",
" nearest = row.nearest_neighbor_km\n",
@@ -263,6 +558,76 @@
" {seds_note}\n",
" '''\n",
"\n",
" drought_lines = ''\n",
" if hasattr(row, 'usdm_status') and pd.notna(row.usdm_status):\n",
" drought_lines = f'''\n",
" <hr style=\"margin: 6px 0;\">\n",
" <strong>Drought context (USDM)</strong><br>\n",
" Status: {clean_value(row.usdm_status)}<br>\n",
" Worst DM category: {fmt_number(row.worst_dm_category)}<br>\n",
" Mean DM category: {fmt_number(row.mean_dm_category, 2)}<br>\n",
" % weeks D2+: {fmt_number(row.pct_weeks_in_d2_or_worse, 1, suffix='%')}<br>\n",
" % weeks D3+: {fmt_number(row.pct_weeks_in_d3_or_worse, 1, suffix='%')}<br>\n",
" Longest D2 streak: {fmt_number(row.longest_d2_streak_weeks)} weeks<br>\n",
" Longest D3 streak: {fmt_number(row.longest_d3_streak_weeks)} weeks<br>\n",
" '''\n",
"\n",
" smoke_lines = ''\n",
" if hasattr(row, 'hms_status') and pd.notna(row.hms_status):\n",
" smoke_lines = f'''\n",
" <hr style=\"margin: 6px 0;\">\n",
" <strong>Wildfire smoke context (HMS)</strong><br>\n",
" Status: {clean_value(row.hms_status)}<br>\n",
" Observed days: {fmt_number(row.days_observed)}<br>\n",
" Any-smoke days: {fmt_number(row.days_with_any_smoke)} ({fmt_number(row.pct_days_with_any_smoke, 1, suffix='%')})<br>\n",
" Heavy-smoke days: {fmt_number(row.days_with_heavy_smoke)} ({fmt_number(row.pct_days_with_heavy_smoke, 1, suffix='%')})<br>\n",
" Worst density class: {clean_value(row.worst_density)}<br>\n",
" Mean density rank: {fmt_number(row.mean_density_rank, 2)}<br>\n",
" '''\n",
"\n",
" climate_lines = ''\n",
" if hasattr(row, 'mean_summer_temperature_c') and pd.notna(row.mean_summer_temperature_c):\n",
" climate_lines = f'''\n",
" <hr style=\"margin: 6px 0;\">\n",
" <strong>Climate context</strong><br>\n",
" Mean annual temp: {fmt_number(row.mean_annual_temperature_c, 1, suffix=' C')}<br>\n",
" Mean summer temp: {fmt_number(row.mean_summer_temperature_c, 1, suffix=' C')}<br>\n",
" Max wet-bulb temp: {fmt_number(row.max_wet_bulb_temperature_c, 1, suffix=' C')}<br>\n",
" Extreme heat days: {fmt_number(row.extreme_heat_days)}<br>\n",
" Annual CDD mean: {fmt_number(row.annual_cooling_degree_days_c_mean, 0)}<br>\n",
" Annual precip mean: {fmt_number(row.annual_precipitation_mm_mean, 0, suffix=' mm')}<br>\n",
" '''\n",
"\n",
" broadband_lines = ''\n",
" if hasattr(row, 'fcc_bdc_status') and pd.notna(row.fcc_bdc_status):\n",
" broadband_lines = f'''\n",
" <hr style=\"margin: 6px 0;\">\n",
" <strong>Broadband context</strong><br>\n",
" FCC BDC status: {clean_value(row.fcc_bdc_status)}<br>\n",
" FCC as-of date: {clean_value(row.fcc_bdc_as_of_date)}<br>\n",
" Census broadband subscription: {fmt_number(row.census_broadband_subscription_pct, 1, suffix='%')}<br>\n",
" Provider count: {fmt_number(row.fcc_provider_count)}<br>\n",
" Fiber providers: {fmt_number(row.fcc_fiber_provider_count)}<br>\n",
" Cable providers: {fmt_number(row.fcc_cable_provider_count)}<br>\n",
" Fixed wireless providers: {fmt_number(row.fcc_fixed_wireless_provider_count)}<br>\n",
" Max advertised down/up: {fmt_number(row.fcc_max_advertised_download_mbps, 0, suffix=' /')} {fmt_number(row.fcc_max_advertised_upload_mbps, 0, suffix=' Mbps')}<br>\n",
" Providers >=100/20: {fmt_number(row.fcc_100_20_provider_count)}<br>\n",
" '''\n",
"\n",
" election_lines = ''\n",
" if hasattr(row, 'election_biden_share_pct') and pd.notna(row.election_biden_share_pct):\n",
" election_lines = f'''\n",
" <hr style=\"margin: 6px 0;\">\n",
" <strong>Election context (2020 precinct)</strong><br>\n",
" State: {clean_value(row.election_state_code)}<br>\n",
" Locality: {clean_value(row.election_locality)}<br>\n",
" Precinct: {clean_value(row.election_precinct)}<br>\n",
" Biden share: {fmt_number(row.election_biden_share_pct, 1, suffix='%')}<br>\n",
" Trump share: {fmt_number(row.election_trump_share_pct, 1, suffix='%')}<br>\n",
" Trump margin: {fmt_number(row.election_trump_margin_pct, 1, suffix=' pp')}<br>\n",
" Join method: {clean_value(row.election_join_method)}<br>\n",
" '''\n",
"\n",
" return folium.Popup(f'''\n",
" <div style=\"font-family: system-ui, sans-serif; min-width: 310px; max-width: 420px;\">\n",
" <strong>{title}</strong><br>\n",
@@ -278,6 +643,11 @@
" {huc8_lines}\n",
" {ruca_lines}\n",
" {energy_lines}\n",
" {drought_lines}\n",
" {smoke_lines}\n",
" {climate_lines}\n",
" {broadband_lines}\n",
" {election_lines}\n",
" </div>\n",
" ''', max_width=460)\n",
"\n",
@@ -347,12 +717,101 @@
" IM3 avg siting score: {fmt_number(row.im3_avg_weighted_siting_score, 3)}<br>\n",
" {seds_note}\n",
" </div>\n",
" ''', max_width=380)\n"
" ''', max_width=380)\n",
"\n",
"\n",
"def cable_style(_feature):\n",
" return {'color': INTERNET_CABLE_COLOR, 'weight': 1.6, 'opacity': 0.45}\n",
"\n",
"\n",
"def cable_popup(feature):\n",
" p = feature.get('properties', {})\n",
" return folium.Popup(f'''\n",
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
" <strong>{clean_value(p.get('name') or 'Internet cable')}</strong><br>\n",
" Owners: {clean_value(p.get('owners'))}<br>\n",
" Type: {clean_value(p.get('cable_type'))}<br>\n",
" RFS year: {fmt_number(p.get('rfs_year'))}<br>\n",
" Decommission year: {fmt_number(p.get('decommission_year'))}<br>\n",
" Length: {fmt_number(p.get('length_km'), 0, suffix=' km')}<br>\n",
" Feature ID: {clean_value(p.get('feature_id'))}\n",
" </div>\n",
" ''', max_width=380)\n",
"\n",
"\n",
"def opposition_popup(row):\n",
" return folium.Popup(f'''\n",
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
" <strong>Opposition case {fmt_number(row.id)}</strong><br>\n",
" Location: {clean_value(row.location)}<br>\n",
" State: {clean_value(row.state)}<br>\n",
" <hr style=\"margin: 6px 0;\">\n",
" Status: {clean_value(row.status)}<br>\n",
" Developer: {clean_value(row.developer)}<br>\n",
" Investment: {fmt_number(row.investment_billion, 2, prefix='$', suffix='B')}<br>\n",
" Opposition type: {clean_value(row.opposition_type)}<br>\n",
" Commons type: {clean_value(row.commons_type)}<br>\n",
" Governance response: {clean_value(row.governance_response)}<br>\n",
" Outcome: {clean_value(row.outcome)}<br>\n",
" Source: {clean_value(row.data_source)}\n",
" </div>\n",
" ''', max_width=400)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10",
"metadata": {},
"outputs": [],
"source": [
"def add_overlay_legend(map_obj: folium.Map) -> None:\n",
" legend_html = \"\"\"\n",
" <div style=\"\n",
" position: fixed;\n",
" bottom: 30px;\n",
" left: 30px;\n",
" z-index: 9999;\n",
" background: rgba(255, 255, 255, 0.96);\n",
" border: 1px solid #d1d5db;\n",
" border-radius: 8px;\n",
" box-shadow: 0 2px 8px rgba(0,0,0,0.15);\n",
" padding: 10px 12px;\n",
" font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif;\n",
" font-size: 12px;\n",
" line-height: 1.35;\n",
" min-width: 260px;\n",
" \">\n",
" <div style=\"font-weight: 700; margin-bottom: 6px;\">Overlay Legend</div>\n",
"\n",
" <div style=\"font-weight: 600; margin-top: 4px;\">Climate (mean summer temperature)</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#0284c7;margin-right:6px;\"></span>&lt; 23 C</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#f59e0b;margin-right:6px;\"></span>23-25.9 C</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ea580c;margin-right:6px;\"></span>26-28.9 C</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#b91c1c;margin-right:6px;\"></span>29-31.9 C</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>&gt;= 32 C</div>\n",
"\n",
" <div style=\"font-weight: 600; margin-top: 6px;\">Broadband (FCC provider count)</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#b45309;margin-right:6px;\"></span>0-1</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ca8a04;margin-right:6px;\"></span>2-4</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#65a30d;margin-right:6px;\"></span>5-9</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#16a34a;margin-right:6px;\"></span>10-19</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#166534;margin-right:6px;\"></span>&gt;= 20</div>\n",
"\n",
" <div style=\"font-weight: 600; margin-top: 6px;\">Election (Trump margin, pp)</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#1e3a8a;margin-right:6px;\"></span>&lt;= -20</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#2563eb;margin-right:6px;\"></span>-19.9 to -5</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#6b7280;margin-right:6px;\"></span>-4.9 to 4.9</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#dc2626;margin-right:6px;\"></span>5 to 19.9</div>\n",
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>&gt;= 20</div>\n",
" </div>\n",
" \"\"\"\n",
" map_obj.get_root().html.add_child(folium.Element(legend_html))"
]
},
{
"cell_type": "markdown",
"id": "8",
"id": "11",
"metadata": {},
"source": [
"## Build The Map"
@@ -361,7 +820,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"id": "12",
"metadata": {},
"outputs": [],
"source": [
@@ -373,6 +832,11 @@
"\n",
" huc8_layer = folium.FeatureGroup(name='HUC8 watersheds with data centers', show=False)\n",
" state_energy_layer = folium.FeatureGroup(name='State energy demand context (IM3 / SEDS)', show=False)\n",
" cables_layer = folium.FeatureGroup(name='Internet cable network', show=False)\n",
" opposition_layer = folium.FeatureGroup(name='Opposition cases', show=False)\n",
" climate_layer = folium.FeatureGroup(name='Climate stress context', show=False)\n",
" broadband_layer = folium.FeatureGroup(name='Broadband capacity context', show=False)\n",
" election_layer = folium.FeatureGroup(name='Election context (2020 precinct match)', show=False)\n",
" clustered_layer = folium.FeatureGroup(name='Data centers: clustered', show=True)\n",
" noise_layer = folium.FeatureGroup(name='Data centers: noise / isolated', show=True)\n",
" centroid_layer = folium.FeatureGroup(name='Cluster centroids and p90 radius', show=True)\n",
@@ -408,6 +872,90 @@
" tooltip=f'{row.state_code}: IM3 {fmt_number(power, suffix=\" MW\")}',\n",
" ).add_to(state_energy_layer)\n",
"\n",
" if SHOW_INTERNET_CABLES_LAYER and internet_cables_geojson is not None:\n",
" folium.GeoJson(\n",
" internet_cables_geojson,\n",
" name='Internet cable network',\n",
" style_function=cable_style,\n",
" highlight_function=lambda _f: {'weight': 3.0, 'opacity': 0.85},\n",
" popup=cable_popup,\n",
" tooltip=folium.GeoJsonTooltip(\n",
" fields=['name', 'cable_type', 'rfs_year'],\n",
" aliases=['Cable', 'Type', 'RFS year'],\n",
" localize=True,\n",
" sticky=False,\n",
" ),\n",
" ).add_to(cables_layer)\n",
"\n",
" if SHOW_OPPOSITION_CASES_LAYER and not opposition_cases.empty:\n",
" for row in opposition_cases.itertuples(index=False):\n",
" marker_radius = 5 if pd.isna(row.investment_billion) else max(5, min(14, 4 + float(row.investment_billion) ** 0.5 * 2.2))\n",
" folium.CircleMarker(\n",
" location=[row.lat, row.lon],\n",
" radius=marker_radius,\n",
" color='#7f1d1d',\n",
" fill=True,\n",
" fill_color=OPPOSITION_CASE_COLOR,\n",
" fill_opacity=0.75,\n",
" weight=1.2,\n",
" popup=opposition_popup(row),\n",
" tooltip=f\"Opposition case: {row.state} ({clean_value(row.status)})\",\n",
" ).add_to(opposition_layer)\n",
"\n",
" if SHOW_CLIMATE_LAYER:\n",
" climate_rows = points_df.dropna(subset=['mean_summer_temperature_c']) if 'mean_summer_temperature_c' in points_df.columns else pd.DataFrame()\n",
" for row in climate_rows.itertuples(index=False):\n",
" color = climate_color(row.mean_summer_temperature_c)\n",
" radius = max(4, min(12, 3 + (float(row.extreme_heat_days) if pd.notna(row.extreme_heat_days) else 0.0) ** 0.5 / 2.0))\n",
" folium.CircleMarker(\n",
" location=[row.latitude, row.longitude],\n",
" radius=radius,\n",
" color=color,\n",
" fill=True,\n",
" fill_color=color,\n",
" fill_opacity=0.35,\n",
" weight=1,\n",
" tooltip=f\"Climate: summer {fmt_number(row.mean_summer_temperature_c, 1, suffix=' C')}; heat days {fmt_number(row.extreme_heat_days)}\",\n",
" ).add_to(climate_layer)\n",
"\n",
" if SHOW_BROADBAND_LAYER:\n",
" bb_rows = points_df.dropna(subset=['fcc_provider_count']) if 'fcc_provider_count' in points_df.columns else pd.DataFrame()\n",
" for row in bb_rows.itertuples(index=False):\n",
" color = broadband_color(row.fcc_provider_count)\n",
" speed = float(row.fcc_max_advertised_download_mbps) if pd.notna(row.fcc_max_advertised_download_mbps) else 0.0\n",
" radius = max(4, min(12, 4 + speed ** 0.5 / 10.0))\n",
" folium.CircleMarker(\n",
" location=[row.latitude, row.longitude],\n",
" radius=radius,\n",
" color=color,\n",
" fill=True,\n",
" fill_color=color,\n",
" fill_opacity=0.3,\n",
" weight=1,\n",
" tooltip=f\"Broadband: providers {fmt_number(row.fcc_provider_count)}; max down {fmt_number(row.fcc_max_advertised_download_mbps, 0, suffix=' Mbps')}\",\n",
" ).add_to(broadband_layer)\n",
"\n",
" if SHOW_ELECTION_LAYER and not election_context.empty:\n",
" for row in election_context.dropna(subset=['election_latitude', 'election_longitude']).itertuples(index=False):\n",
" margin = getattr(row, 'election_trump_margin_pct')\n",
" color = election_color(margin)\n",
" radius = max(4, min(11, 4 + abs(float(margin)) / 8.0)) if pd.notna(margin) else 5\n",
" tip = (\n",
" f\"Election precinct: {row.election_state_code} {clean_value(row.election_locality)}; \"\n",
" f\"Biden {fmt_number(row.election_biden_share_pct, 1, suffix='%')} / \"\n",
" f\"Trump {fmt_number(row.election_trump_share_pct, 1, suffix='%')}\"\n",
" )\n",
" folium.CircleMarker(\n",
" location=[row.election_latitude, row.election_longitude],\n",
" radius=radius,\n",
" color=color,\n",
" fill=True,\n",
" fill_color=color,\n",
" fill_opacity=0.4,\n",
" weight=1,\n",
" tooltip=tip,\n",
" ).add_to(election_layer)\n",
"\n",
" bounds = []\n",
" for row in points_df.itertuples(index=False):\n",
" cluster_label, cluster_size, _ = cluster_label_and_size(row.cluster_id)\n",
@@ -453,6 +1001,11 @@
"\n",
" huc8_layer.add_to(m)\n",
" state_energy_layer.add_to(m)\n",
" cables_layer.add_to(m)\n",
" opposition_layer.add_to(m)\n",
" climate_layer.add_to(m)\n",
" broadband_layer.add_to(m)\n",
" election_layer.add_to(m)\n",
" clustered_layer.add_to(m)\n",
" noise_layer.add_to(m)\n",
" centroid_layer.add_to(m)\n",
@@ -468,7 +1021,7 @@
},
{
"cell_type": "markdown",
"id": "10",
"id": "13",
"metadata": {},
"source": [
"## Export HTML"
@@ -477,7 +1030,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"id": "14",
"metadata": {},
"outputs": [],
"source": [
@@ -487,7 +1040,7 @@
},
{
"cell_type": "markdown",
"id": "12",
"id": "15",
"metadata": {},
"source": [
"## Feature Staging Area\n",

File diff suppressed because one or more lines are too long