1578 lines
82 KiB
Plaintext
1578 lines
82 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Enhanced Data Center Cluster Map\n",
|
|
"\n",
|
|
"This notebook starts from the spatial clustering outputs created by `spatial_clustering_master_data_centers.ipynb` and adds contextual layers from the demographic/RUCA/energy analysis.\n",
|
|
"\n",
|
|
"Current features:\n",
|
|
"- Loads point and cluster summary CSVs from `output/`.\n",
|
|
"- Recreates the cluster-colored Folium map.\n",
|
|
"- Enriches point popups with HUC8 watershed, RUCA, tract demographics, and state energy context where available.\n",
|
|
"- Adds separate layers for clustered points, isolated/noise points, cluster centroids, HUC8 watersheds, state IM3 projected demand, EIA generator capacity, and utility-rate context.\n",
|
|
"- Saves a standalone HTML map to `output/enhanced_master_data_center_spatial_clusters_map.html`.\n",
|
|
"\n",
|
|
"Notes from `output/data_center_demographic_ruca_energy_summary.md`:\n",
|
|
"- HUC8 watershed join is a recommended next step for water-context analysis.\n",
|
|
"- `im3_state_projected_moderate_50` is populated and used for state projected demand context.\n",
|
|
"- `seds_state_msn_year` is checked through the state context export, but it currently has no rows, so SEDS fields are blank until that table is populated.\n",
|
|
"- EIA generator capacity uses the latest available period in `public.energy_eia_operating_generator_capacity_flat`; utility-rate context uses `public.utility_rate_tracker_2025_2028`.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import json\n",
|
|
"import subprocess\n",
|
|
"from html import escape\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"os.environ.setdefault('MPLCONFIGDIR', '/tmp/matplotlib')\n",
|
|
"Path(os.environ['MPLCONFIGDIR']).mkdir(parents=True, exist_ok=True)\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"import folium\n",
|
|
"import psycopg2\n",
|
|
"from folium import plugins\n",
|
|
"\n",
|
|
"print('pandas:', pd.__version__)\n",
|
|
"print('folium:', folium.__version__)\n",
|
|
"print('psycopg2:', psycopg2.__version__)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "2",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Paths And Display Settings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"OUTPUT_DIR = Path('output')\n",
|
|
"POINTS_CSV = OUTPUT_DIR / 'master_data_center_spatial_cluster_points.csv'\n",
|
|
"CLUSTERS_CSV = OUTPUT_DIR / 'master_data_center_spatial_cluster_summary.csv'\n",
|
|
"POINT_CONTEXT_CSV = OUTPUT_DIR / 'master_data_center_map_context.csv'\n",
|
|
"HUC8_GEOJSON = OUTPUT_DIR / 'master_data_center_huc8_watersheds.geojson'\n",
|
|
"STATE_ENERGY_CSV = OUTPUT_DIR / 'master_data_center_state_energy_context.csv'\n",
|
|
"MAP_HTML = OUTPUT_DIR / 'enhanced_master_data_center_spatial_clusters_map.html'\n",
|
|
"\n",
|
|
"MAP_CENTER = [39, -98]\n",
|
|
"MAP_ZOOM = 4\n",
|
|
"BASE_TILES = 'CartoDB positron'\n",
|
|
"\n",
|
|
"MAX_POINTS = None\n",
|
|
"\n",
|
|
"CLUSTERED_RADIUS = 5\n",
|
|
"NOISE_RADIUS = 3\n",
|
|
"CENTROID_RADIUS = 7\n",
|
|
"SHOW_CENTROID_P90_CIRCLES = True\n",
|
|
"SHOW_HUC8_LAYER = True\n",
|
|
"SHOW_STATE_ENERGY_LAYER = True\n",
|
|
"\n",
|
|
"# Existing DB-backed overlays.\n",
|
|
"ENABLE_DB_LAYER_LOAD = True\n",
|
|
"SHOW_INTERNET_CABLES_LAYER = True\n",
|
|
"SHOW_OPPOSITION_CASES_LAYER = True\n",
|
|
"SHOW_DROUGHT_AND_SMOKE_CONTEXT = True\n",
|
|
"\n",
|
|
"# New requested overlays.\n",
|
|
"SHOW_CLIMATE_LAYER = True\n",
|
|
"SHOW_BROADBAND_LAYER = True\n",
|
|
"SHOW_ELECTION_LAYER = True\n",
|
|
"SHOW_ELECTION_2020_LAYER = True\n",
|
|
"SHOW_ELECTION_2024_LAYER = False\n",
|
|
"SHOW_NRI_LAYER = True\n",
|
|
"SHOW_EIA_GENERATOR_CAPACITY_LAYER = True\n",
|
|
"EIA_GENERATOR_PERIOD = None # None uses the latest available EIA period.\n",
|
|
"MAX_EIA_GENERATOR_PLANTS = 2000\n",
|
|
"SHOW_UTILITY_RATE_TRACKER_LAYER = True\n",
|
|
"\n",
|
|
"OUTPUT_DIR.mkdir(exist_ok=True)\n",
|
|
"print('points:', POINTS_CSV)\n",
|
|
"print('clusters:', CLUSTERS_CSV)\n",
|
|
"print('point context:', POINT_CONTEXT_CSV)\n",
|
|
"print('HUC8 GeoJSON:', HUC8_GEOJSON)\n",
|
|
"print('state energy context:', STATE_ENERGY_CSV)\n",
|
|
"print('html output:', MAP_HTML)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load Cluster Outputs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"required_files = [POINTS_CSV, CLUSTERS_CSV]\n",
|
|
"missing = [str(p) for p in required_files if not p.exists()]\n",
|
|
"if missing:\n",
|
|
" raise FileNotFoundError('Missing required cluster output CSV(s): ' + ', '.join(missing))\n",
|
|
"\n",
|
|
"points = pd.read_csv(POINTS_CSV)\n",
|
|
"clusters = pd.read_csv(CLUSTERS_CSV)\n",
|
|
"point_context = pd.read_csv(POINT_CONTEXT_CSV) if POINT_CONTEXT_CSV.exists() else pd.DataFrame()\n",
|
|
"state_energy = pd.read_csv(STATE_ENERGY_CSV) if STATE_ENERGY_CSV.exists() else pd.DataFrame()\n",
|
|
"\n",
|
|
"if MAX_POINTS is not None:\n",
|
|
" points = points.head(MAX_POINTS).copy()\n",
|
|
"\n",
|
|
"points['cluster_id'] = pd.to_numeric(points['cluster_id'], errors='coerce').fillna(-1).astype(int)\n",
|
|
"points['is_noise'] = points['cluster_id'].eq(-1)\n",
|
|
"points['is_clustered'] = ~points['is_noise']\n",
|
|
"points['name'] = points['name'].fillna('')\n",
|
|
"points['operator'] = points['operator'].fillna('Unknown').replace('', 'Unknown')\n",
|
|
"points['city'] = points['city'].fillna('Unknown').replace('', 'Unknown')\n",
|
|
"points['state'] = points['state'].fillna('UNK').replace('', 'UNK')\n",
|
|
"points['source'] = points['source'].fillna('unknown').replace('', 'unknown')\n",
|
|
"\n",
|
|
"if not point_context.empty:\n",
|
|
" context_cols = [c for c in point_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(point_context[['master_id'] + context_cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
"if not state_energy.empty:\n",
|
|
" state_cols = [c for c in state_energy.columns if c != 'state_code']\n",
|
|
" points = points.merge(state_energy[['state_code'] + state_cols], left_on='state', right_on='state_code', how='left')\n",
|
|
"\n",
|
|
"clusters['cluster_id'] = pd.to_numeric(clusters['cluster_id'], errors='coerce').astype(int)\n",
|
|
"clusters = clusters.sort_values(['point_count', 'radius_km_p90'], ascending=[False, True]).reset_index(drop=True)\n",
|
|
"clusters['cluster_rank'] = clusters.index + 1\n",
|
|
"\n",
|
|
"huc8_geojson = None\n",
|
|
"if HUC8_GEOJSON.exists():\n",
|
|
" huc8_geojson = json.loads(HUC8_GEOJSON.read_text())\n",
|
|
"\n",
|
|
"n_clusters = points.loc[points['cluster_id'].ne(-1), 'cluster_id'].nunique()\n",
|
|
"print(f'Loaded {len(points):,} points and {n_clusters:,} clusters')\n",
|
|
"print('point context columns:', 0 if point_context.empty else len(point_context.columns))\n",
|
|
"print('HUC8 features:', 0 if huc8_geojson is None else len(huc8_geojson.get('features', [])))\n",
|
|
"if not state_energy.empty:\n",
|
|
" seds_available = state_energy['seds_series_count'].notna().sum() if 'seds_series_count' in state_energy.columns else 0\n",
|
|
" print(f'state energy rows: {len(state_energy):,}; SEDS rows represented: {seds_available:,}')\n",
|
|
"else:\n",
|
|
" print('state energy context file not found')\n",
|
|
"display(points.head())\n",
|
|
"display(clusters.head(10))\n",
|
|
"if not state_energy.empty:\n",
|
|
" display(state_energy.head(10))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DB_NAME = 'data_centers'\n",
|
|
"DB_REQUIRED_ENV = ['PGWEB_HOST', 'PGWEB_PORT', 'PGWEB_USER', 'PGWEB_PASSWORD']\n",
|
|
"\n",
|
|
"internet_cables_geojson = None\n",
|
|
"opposition_cases = pd.DataFrame()\n",
|
|
"drought_context = pd.DataFrame()\n",
|
|
"smoke_context = pd.DataFrame()\n",
|
|
"climate_context = pd.DataFrame()\n",
|
|
"broadband_context = pd.DataFrame()\n",
|
|
"election_context = pd.DataFrame()\n",
|
|
"nri_context = pd.DataFrame()\n",
|
|
"generator_capacity_plants = pd.DataFrame()\n",
|
|
"utility_rate_tracker = pd.DataFrame()\n",
|
|
"utility_rate_state_context = pd.DataFrame()\n",
|
|
"\n",
|
|
"\n",
|
|
"def load_zsh_secrets() -> None:\n",
|
|
" secrets = Path.home() / '.zsh_secrets'\n",
|
|
" if not secrets.exists():\n",
|
|
" return\n",
|
|
" result = subprocess.run(\n",
|
|
" ['zsh', '-lc', 'source ~/.zsh_secrets >/dev/null 2>&1; env'],\n",
|
|
" check=True,\n",
|
|
" capture_output=True,\n",
|
|
" text=True,\n",
|
|
" )\n",
|
|
" for line in result.stdout.splitlines():\n",
|
|
" if '=' not in line:\n",
|
|
" continue\n",
|
|
" key, value = line.split('=', 1)\n",
|
|
" if key and key not in os.environ:\n",
|
|
" os.environ[key] = value\n",
|
|
"\n",
|
|
"\n",
|
|
"def db_ready() -> bool:\n",
|
|
" return all(os.getenv(k) for k in DB_REQUIRED_ENV)\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_conn():\n",
|
|
" return psycopg2.connect(\n",
|
|
" host=os.environ['PGWEB_HOST'],\n",
|
|
" port=os.environ['PGWEB_PORT'],\n",
|
|
" user=os.environ['PGWEB_USER'],\n",
|
|
" password=os.environ['PGWEB_PASSWORD'],\n",
|
|
" dbname=DB_NAME,\n",
|
|
" )\n",
|
|
"\n",
|
|
"\n",
|
|
"def load_optional_db_layers() -> None:\n",
|
|
" global internet_cables_geojson, opposition_cases, drought_context, smoke_context\n",
|
|
" global climate_context, broadband_context, election_context, nri_context, points\n",
|
|
" global generator_capacity_plants, utility_rate_tracker, utility_rate_state_context\n",
|
|
"\n",
|
|
" if not ENABLE_DB_LAYER_LOAD:\n",
|
|
" print('DB layer load disabled')\n",
|
|
" return\n",
|
|
"\n",
|
|
" load_zsh_secrets()\n",
|
|
" if not db_ready():\n",
|
|
" print('Skipping DB-backed layers: missing PGWEB_* environment variables')\n",
|
|
" return\n",
|
|
"\n",
|
|
" with get_conn() as conn:\n",
|
|
" if SHOW_INTERNET_CABLES_LAYER:\n",
|
|
" cable_sql = \"\"\"\n",
|
|
" select json_build_object(\n",
|
|
" 'type','FeatureCollection',\n",
|
|
" 'features', coalesce(json_agg(\n",
|
|
" json_build_object(\n",
|
|
" 'type','Feature',\n",
|
|
" 'geometry', ST_AsGeoJSON(geom)::json,\n",
|
|
" 'properties', json_build_object(\n",
|
|
" 'feature_id', feature_id,\n",
|
|
" 'name', name,\n",
|
|
" 'owners', owners,\n",
|
|
" 'rfs_year', rfs_year,\n",
|
|
" 'decommission_year', decommission_year,\n",
|
|
" 'length_km', length_km,\n",
|
|
" 'cable_type', cable_type\n",
|
|
" )\n",
|
|
" )\n",
|
|
" ), '[]'::json)\n",
|
|
" ) as fc\n",
|
|
" from public.internet_cables\n",
|
|
" where geom is not null\n",
|
|
" \"\"\"\n",
|
|
" internet_cables_geojson = pd.read_sql(cable_sql, conn).iloc[0]['fc']\n",
|
|
" n_cables = len(internet_cables_geojson.get('features', [])) if internet_cables_geojson else 0\n",
|
|
" print(f'internet_cables features: {n_cables:,}')\n",
|
|
"\n",
|
|
" if SHOW_OPPOSITION_CASES_LAYER:\n",
|
|
" opposition_sql = \"\"\"\n",
|
|
" select\n",
|
|
" id, location, state, lat, lon, investment_billion, status,\n",
|
|
" developer, commons_type, governance_response, outcome, opposition_type, data_source\n",
|
|
" from public.opposition_cases_geocoded\n",
|
|
" where lat is not null and lon is not null\n",
|
|
" \"\"\"\n",
|
|
" opposition_cases = pd.read_sql(opposition_sql, conn)\n",
|
|
" print(f'opposition_cases rows: {len(opposition_cases):,}')\n",
|
|
"\n",
|
|
" if SHOW_EIA_GENERATOR_CAPACITY_LAYER:\n",
|
|
" generator_sql = \"\"\"\n",
|
|
" with selected_period as (\n",
|
|
" select coalesce(%(period)s::text, max(period)) as period\n",
|
|
" from public.energy_eia_operating_generator_capacity_flat\n",
|
|
" ),\n",
|
|
" latest_generators as (\n",
|
|
" select g.*\n",
|
|
" from public.energy_eia_operating_generator_capacity_flat g\n",
|
|
" join selected_period sp on g.period = sp.period\n",
|
|
" where g.geom is not null\n",
|
|
" and g.latitude is not null\n",
|
|
" and g.longitude is not null\n",
|
|
" ),\n",
|
|
" source_capacity as (\n",
|
|
" select\n",
|
|
" plant_id,\n",
|
|
" energy_source_code,\n",
|
|
" max(energy_source_desc) as energy_source_desc,\n",
|
|
" sum(coalesce(nameplate_capacity_mw, 0)) as nameplate_capacity_mw,\n",
|
|
" count(*) as generator_count\n",
|
|
" from latest_generators\n",
|
|
" group by plant_id, energy_source_code\n",
|
|
" ),\n",
|
|
" source_rank as (\n",
|
|
" select\n",
|
|
" *,\n",
|
|
" row_number() over (\n",
|
|
" partition by plant_id\n",
|
|
" order by nameplate_capacity_mw desc nulls last, energy_source_code nulls last\n",
|
|
" ) as rn\n",
|
|
" from source_capacity\n",
|
|
" ),\n",
|
|
" source_mix as (\n",
|
|
" select\n",
|
|
" plant_id,\n",
|
|
" string_agg(\n",
|
|
" coalesce(energy_source_code, 'UNK') || ': ' ||\n",
|
|
" round(nameplate_capacity_mw::numeric, 1)::text || ' MW',\n",
|
|
" ', ' order by nameplate_capacity_mw desc nulls last\n",
|
|
" ) as energy_source_mix\n",
|
|
" from source_capacity\n",
|
|
" group by plant_id\n",
|
|
" ),\n",
|
|
" plant_capacity as (\n",
|
|
" select\n",
|
|
" lg.period,\n",
|
|
" lg.plant_id,\n",
|
|
" max(lg.plant_name) as plant_name,\n",
|
|
" max(lg.state_id) as state_id,\n",
|
|
" max(lg.state_name) as state_name,\n",
|
|
" string_agg(distinct nullif(lg.entity_name, ''), '; ') as entity_names,\n",
|
|
" max(lg.balancing_authority_code) as balancing_authority_code,\n",
|
|
" max(lg.balancing_authority_name) as balancing_authority_name,\n",
|
|
" avg(lg.latitude) as latitude,\n",
|
|
" avg(lg.longitude) as longitude,\n",
|
|
" sum(coalesce(lg.nameplate_capacity_mw, 0)) as nameplate_capacity_mw,\n",
|
|
" sum(coalesce(lg.net_summer_capacity_mw, 0)) as net_summer_capacity_mw,\n",
|
|
" sum(coalesce(lg.net_winter_capacity_mw, 0)) as net_winter_capacity_mw,\n",
|
|
" count(*) as generator_count\n",
|
|
" from latest_generators lg\n",
|
|
" group by lg.period, lg.plant_id\n",
|
|
" )\n",
|
|
" select\n",
|
|
" pc.*,\n",
|
|
" sr.energy_source_code as primary_energy_source_code,\n",
|
|
" sr.energy_source_desc as primary_energy_source_desc,\n",
|
|
" sm.energy_source_mix\n",
|
|
" from plant_capacity pc\n",
|
|
" left join source_rank sr on sr.plant_id = pc.plant_id and sr.rn = 1\n",
|
|
" left join source_mix sm on sm.plant_id = pc.plant_id\n",
|
|
" where pc.nameplate_capacity_mw > 0\n",
|
|
" order by pc.nameplate_capacity_mw desc nulls last\n",
|
|
" limit %(limit)s\n",
|
|
" \"\"\"\n",
|
|
" generator_capacity_plants = pd.read_sql(\n",
|
|
" generator_sql,\n",
|
|
" conn,\n",
|
|
" params={'period': EIA_GENERATOR_PERIOD, 'limit': MAX_EIA_GENERATOR_PLANTS},\n",
|
|
" )\n",
|
|
" period_label = (\n",
|
|
" generator_capacity_plants['period'].iloc[0]\n",
|
|
" if not generator_capacity_plants.empty and 'period' in generator_capacity_plants\n",
|
|
" else EIA_GENERATOR_PERIOD\n",
|
|
" )\n",
|
|
" print(\n",
|
|
" f'eia generator capacity plants: {len(generator_capacity_plants):,} '\n",
|
|
" f'(period {period_label}; top {MAX_EIA_GENERATOR_PLANTS:,} by nameplate MW)'\n",
|
|
" )\n",
|
|
"\n",
|
|
" if SHOW_UTILITY_RATE_TRACKER_LAYER:\n",
|
|
" utility_rate_sql = \"\"\"\n",
|
|
" select\n",
|
|
" utility_provider, state_name, state_code as utility_state_code, state_id,\n",
|
|
" service_type, customer_count, total_revenue_increase_2025_2028,\n",
|
|
" time_period, monthly_increase_amount, monthly_pct_increase_ratio,\n",
|
|
" effective_date, effective_date_raw, status, source_file\n",
|
|
" from public.utility_rate_tracker_2025_2028\n",
|
|
" order by state_code, utility_provider, service_type, effective_date\n",
|
|
" \"\"\"\n",
|
|
" utility_rate_state_sql = \"\"\"\n",
|
|
" with state_rollup as (\n",
|
|
" select\n",
|
|
" state_code as utility_state_code,\n",
|
|
" max(state_name) as utility_state_name,\n",
|
|
" count(*) as utility_rate_case_count,\n",
|
|
" count(distinct utility_provider) as utility_rate_provider_count,\n",
|
|
" count(*) filter (where lower(coalesce(service_type, '')) like 'electric%%') as utility_rate_electric_case_count,\n",
|
|
" count(*) filter (where lower(coalesce(service_type, '')) like '%%gas%%') as utility_rate_gas_case_count,\n",
|
|
" sum(coalesce(customer_count, 0)) as utility_rate_customer_count,\n",
|
|
" sum(coalesce(total_revenue_increase_2025_2028, 0)) as utility_rate_total_revenue_increase_2025_2028,\n",
|
|
" avg(monthly_increase_amount) as utility_rate_avg_monthly_increase_amount,\n",
|
|
" avg(monthly_pct_increase_ratio) as utility_rate_avg_monthly_pct_increase_ratio,\n",
|
|
" min(effective_date) as utility_rate_first_effective_date,\n",
|
|
" max(effective_date) as utility_rate_last_effective_date\n",
|
|
" from public.utility_rate_tracker_2025_2028\n",
|
|
" group by state_code\n",
|
|
" ),\n",
|
|
" ranked_utilities as (\n",
|
|
" select\n",
|
|
" state_code as utility_state_code,\n",
|
|
" utility_provider,\n",
|
|
" service_type,\n",
|
|
" total_revenue_increase_2025_2028,\n",
|
|
" row_number() over (\n",
|
|
" partition by state_code\n",
|
|
" order by total_revenue_increase_2025_2028 desc nulls last, utility_provider\n",
|
|
" ) as rn\n",
|
|
" from public.utility_rate_tracker_2025_2028\n",
|
|
" ),\n",
|
|
" top_utilities as (\n",
|
|
" select\n",
|
|
" utility_state_code,\n",
|
|
" string_agg(\n",
|
|
" coalesce(utility_provider, 'Unknown') || ' (' || coalesce(service_type, 'service') || ')',\n",
|
|
" '; ' order by rn\n",
|
|
" ) as utility_rate_top_utilities\n",
|
|
" from ranked_utilities\n",
|
|
" where rn <= 3\n",
|
|
" group by utility_state_code\n",
|
|
" )\n",
|
|
" select sr.*, tu.utility_rate_top_utilities\n",
|
|
" from state_rollup sr\n",
|
|
" left join top_utilities tu using (utility_state_code)\n",
|
|
" order by utility_state_code\n",
|
|
" \"\"\"\n",
|
|
" utility_rate_tracker = pd.read_sql(utility_rate_sql, conn)\n",
|
|
" utility_rate_state_context = pd.read_sql(utility_rate_state_sql, conn)\n",
|
|
" print(f'utility_rate_tracker rows: {len(utility_rate_tracker):,}')\n",
|
|
" print(f'utility_rate_state_context rows: {len(utility_rate_state_context):,}')\n",
|
|
"\n",
|
|
" if not utility_rate_state_context.empty:\n",
|
|
" if not state_energy.empty and {'state_code', 'map_latitude', 'map_longitude'}.issubset(state_energy.columns):\n",
|
|
" state_coords = state_energy[['state_code', 'map_latitude', 'map_longitude']].copy()\n",
|
|
" state_coords = state_coords.rename(columns={'state_code': 'utility_state_code'})\n",
|
|
" utility_rate_state_context = utility_rate_state_context.merge(\n",
|
|
" state_coords, on='utility_state_code', how='left'\n",
|
|
" )\n",
|
|
"\n",
|
|
" cols = [\n",
|
|
" c for c in utility_rate_state_context.columns\n",
|
|
" if c not in {'utility_state_code', 'map_latitude', 'map_longitude'}\n",
|
|
" ]\n",
|
|
" points = points.merge(\n",
|
|
" utility_rate_state_context[['utility_state_code'] + cols],\n",
|
|
" left_on='state',\n",
|
|
" right_on='utility_state_code',\n",
|
|
" how='left',\n",
|
|
" )\n",
|
|
"\n",
|
|
" if SHOW_DROUGHT_AND_SMOKE_CONTEXT:\n",
|
|
" drought_sql = \"\"\"\n",
|
|
" select\n",
|
|
" master_id, usdm_status, worst_dm_category, mean_dm_category,\n",
|
|
" pct_weeks_in_d2_or_worse, pct_weeks_in_d3_or_worse,\n",
|
|
" longest_d2_streak_weeks, longest_d3_streak_weeks\n",
|
|
" from public.data_center_usdm_drought_exposure\n",
|
|
" \"\"\"\n",
|
|
" smoke_sql = \"\"\"\n",
|
|
" select\n",
|
|
" master_id, hms_status, smoke_period_start, smoke_period_end,\n",
|
|
" days_observed, days_with_any_smoke, days_with_heavy_smoke,\n",
|
|
" pct_days_with_any_smoke, pct_days_with_heavy_smoke,\n",
|
|
" worst_density, mean_density_rank\n",
|
|
" from public.data_center_hms_smoke_exposure\n",
|
|
" \"\"\"\n",
|
|
" drought_context = pd.read_sql(drought_sql, conn)\n",
|
|
" smoke_context = pd.read_sql(smoke_sql, conn)\n",
|
|
" print(f'drought_context rows: {len(drought_context):,}')\n",
|
|
" print(f'smoke_context rows: {len(smoke_context):,}')\n",
|
|
"\n",
|
|
" if not drought_context.empty:\n",
|
|
" cols = [c for c in drought_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(drought_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
" if not smoke_context.empty:\n",
|
|
" cols = [c for c in smoke_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(smoke_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
" if SHOW_CLIMATE_LAYER:\n",
|
|
" climate_sql = \"\"\"\n",
|
|
" select\n",
|
|
" master_id, mean_annual_temperature_c, mean_summer_temperature_c,\n",
|
|
" max_wet_bulb_temperature_c, extreme_heat_days,\n",
|
|
" annual_cooling_degree_days_c_mean, annual_precipitation_mm_mean\n",
|
|
" from public.data_center_historical_climate\n",
|
|
" \"\"\"\n",
|
|
" climate_context = pd.read_sql(climate_sql, conn)\n",
|
|
" print(f'climate_context rows: {len(climate_context):,}')\n",
|
|
" if not climate_context.empty:\n",
|
|
" cols = [c for c in climate_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(climate_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
" if SHOW_BROADBAND_LAYER:\n",
|
|
" broadband_sql = \"\"\"\n",
|
|
" select\n",
|
|
" master_id, census_broadband_subscription_pct,\n",
|
|
" fcc_bdc_status, fcc_bdc_as_of_date,\n",
|
|
" fcc_provider_count, fcc_fiber_provider_count, fcc_cable_provider_count,\n",
|
|
" fcc_fixed_wireless_provider_count,\n",
|
|
" fcc_max_advertised_download_mbps, fcc_max_advertised_upload_mbps,\n",
|
|
" fcc_100_20_provider_count\n",
|
|
" from public.data_center_broadband_connection\n",
|
|
" \"\"\"\n",
|
|
" broadband_context = pd.read_sql(broadband_sql, conn)\n",
|
|
" print(f'broadband_context rows: {len(broadband_context):,}')\n",
|
|
" if not broadband_context.empty:\n",
|
|
" cols = [c for c in broadband_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(broadband_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
" if SHOW_ELECTION_LAYER:\n",
|
|
" election_sql = \"\"\"\n",
|
|
" with best_match as (\n",
|
|
" select distinct on (m.master_id)\n",
|
|
" m.master_id,\n",
|
|
" m.state_code as election_state_code,\n",
|
|
" m.join_method as election_join_method,\n",
|
|
" m.match_distance_m as election_match_distance_m,\n",
|
|
" f.feature_id, f.layer_id, f.properties,\n",
|
|
" ST_Y(ST_PointOnSurface(f.geom)) as election_latitude,\n",
|
|
" ST_X(ST_PointOnSurface(f.geom)) as election_longitude\n",
|
|
" from public.data_center_rdh_precinct_vote_matches m\n",
|
|
" join public.rdh_precinct_vote_features f\n",
|
|
" on f.feature_id = m.feature_id and f.layer_id = m.layer_id\n",
|
|
" where f.geom is not null\n",
|
|
" order by m.master_id,\n",
|
|
" case m.join_method when 'point_in_precinct' then 0 else 1 end,\n",
|
|
" m.match_distance_m asc nulls last\n",
|
|
" )\n",
|
|
" select\n",
|
|
" master_id, election_state_code, election_join_method, election_match_distance_m,\n",
|
|
" feature_id, layer_id as election_layer_id, election_latitude, election_longitude, properties,\n",
|
|
" coalesce((properties->>'LOCALITY'), '') as election_locality,\n",
|
|
" coalesce((properties->>'PRECINCT'), '') as election_precinct,\n",
|
|
" nullif(properties->>'G20PREDBID','')::double precision as election_2020_dem_votes,\n",
|
|
" nullif(properties->>'G20PRERTRU','')::double precision as election_2020_rep_votes\n",
|
|
" from best_match\n",
|
|
" \"\"\"\n",
|
|
" election_context = pd.read_sql(election_sql, conn)\n",
|
|
" if not election_context.empty:\n",
|
|
" election_context['election_2020_total_votes'] = (\n",
|
|
" election_context['election_2020_dem_votes'].fillna(0) + election_context['election_2020_rep_votes'].fillna(0)\n",
|
|
" )\n",
|
|
" election_context.loc[election_context['election_2020_total_votes'].eq(0), 'election_2020_total_votes'] = pd.NA\n",
|
|
"\n",
|
|
"\n",
|
|
" election_context['election_2020_dem_share_pct'] = 100.0 * election_context['election_2020_dem_votes'] / election_context['election_2020_total_votes']\n",
|
|
" election_context['election_2020_rep_share_pct'] = 100.0 * election_context['election_2020_rep_votes'] / election_context['election_2020_total_votes']\n",
|
|
" election_context['election_2020_rep_margin_pct'] = (\n",
|
|
" election_context['election_2020_rep_share_pct'] - election_context['election_2020_dem_share_pct']\n",
|
|
" )\n",
|
|
"\n",
|
|
"\n",
|
|
" election_context['election_biden_votes'] = election_context['election_2020_dem_votes']\n",
|
|
" election_context['election_trump_votes'] = election_context['election_2020_rep_votes']\n",
|
|
" election_context['election_biden_share_pct'] = election_context['election_2020_dem_share_pct']\n",
|
|
" election_context['election_trump_share_pct'] = election_context['election_2020_rep_share_pct']\n",
|
|
" election_context['election_trump_margin_pct'] = election_context['election_2020_rep_margin_pct']\n",
|
|
" election_context = election_context.drop(columns=['properties'])\n",
|
|
"\n",
|
|
" print(f'election_context rows: {len(election_context):,}')\n",
|
|
" if not election_context.empty:\n",
|
|
" cols = [c for c in election_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(election_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
" if SHOW_NRI_LAYER:\n",
|
|
" # FEMA National Risk Index (December 2025). Per-DC values come from the\n",
|
|
" # census tract that contains the DC point. We pull composite scores plus\n",
|
|
" # the per-hazard risk score for the 18 NRI hazards.\n",
|
|
" nri_sql = \"\"\"\n",
|
|
" select\n",
|
|
" master_id, nri_status, \"TRACTFIPS\" as nri_tractfips,\n",
|
|
" \"RISK_SCORE\" as nri_risk_score, \"RISK_RATNG\" as nri_risk_rating,\n",
|
|
" \"EAL_SCORE\" as nri_eal_score, \"EAL_RATNG\" as nri_eal_rating,\n",
|
|
" \"EAL_VALT\" as nri_eal_total_usd,\n",
|
|
" \"SOVI_SCORE\" as nri_sovi_score, \"SOVI_RATNG\" as nri_sovi_rating,\n",
|
|
" \"RESL_SCORE\" as nri_resl_score, \"RESL_RATNG\" as nri_resl_rating,\n",
|
|
" \"AVLN_RISKS\" as nri_avln_risk, \"CFLD_RISKS\" as nri_cfld_risk,\n",
|
|
" \"CWAV_RISKS\" as nri_cwav_risk, \"DRGT_RISKS\" as nri_drgt_risk,\n",
|
|
" \"ERQK_RISKS\" as nri_erqk_risk, \"HAIL_RISKS\" as nri_hail_risk,\n",
|
|
" \"HWAV_RISKS\" as nri_hwav_risk, \"HRCN_RISKS\" as nri_hrcn_risk,\n",
|
|
" \"ISTM_RISKS\" as nri_istm_risk, \"LNDS_RISKS\" as nri_lnds_risk,\n",
|
|
" \"LTNG_RISKS\" as nri_ltng_risk, \"IFLD_RISKS\" as nri_ifld_risk,\n",
|
|
" \"SWND_RISKS\" as nri_swnd_risk, \"TRND_RISKS\" as nri_trnd_risk,\n",
|
|
" \"TSUN_RISKS\" as nri_tsun_risk, \"VLCN_RISKS\" as nri_vlcn_risk,\n",
|
|
" \"WFIR_RISKS\" as nri_wfir_risk, \"WNTW_RISKS\" as nri_wntw_risk\n",
|
|
" from public.data_center_nri_exposure\n",
|
|
" \"\"\"\n",
|
|
" nri_context = pd.read_sql(nri_sql, conn)\n",
|
|
" print(f'nri_context rows: {len(nri_context):,}')\n",
|
|
" if not nri_context.empty:\n",
|
|
" cols = [c for c in nri_context.columns if c != 'master_id']\n",
|
|
" points = points.merge(nri_context[['master_id'] + cols], on='master_id', how='left')\n",
|
|
"\n",
|
|
"\n",
|
|
"load_optional_db_layers()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Optional DB-backed Layer Context\n",
|
|
"\n",
|
|
"This section pulls additional overlays directly from PostGIS:\n",
|
|
"- `public.internet_cables` (line layer)\n",
|
|
"- `public.opposition_cases_geocoded` (point layer)\n",
|
|
"- `public.data_center_usdm_drought_exposure` (point popup enrichment)\n",
|
|
"- `public.data_center_hms_smoke_exposure` (point popup enrichment)\n",
|
|
"- `public.data_center_historical_climate` (climate stress layer + popup)\n",
|
|
"- `public.data_center_broadband_connection` (broadband capacity layer + popup)\n",
|
|
"- `public.data_center_rdh_precinct_vote_matches` (election context layer + popup)\n",
|
|
"- `public.data_center_nri_exposure` (FEMA NRI multi-hazard risk layer + popup)\n",
|
|
"- `public.energy_eia_operating_generator_capacity_flat` (latest-period generator capacity plant layer)\n",
|
|
"- `public.utility_rate_tracker_2025_2028` (state utility-rate tracker layer + point popup enrichment)\n",
|
|
"\n",
|
|
"If DB credentials are unavailable, map generation still works with CSV/GeoJSON sources."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Map Helpers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"CLUSTER_COLORS = [\n",
|
|
" '#2563eb', '#dc2626', '#16a34a', '#9333ea', '#ea580c', '#0891b2',\n",
|
|
" '#be123c', '#4f46e5', '#65a30d', '#c026d3', '#0f766e', '#b45309',\n",
|
|
"]\n",
|
|
"NOISE_COLOR = '#9ca3af'\n",
|
|
"CENTROID_COLOR = '#111827'\n",
|
|
"STATE_ENERGY_COLOR = '#f59e0b'\n",
|
|
"INTERNET_CABLE_COLOR = '#7c3aed'\n",
|
|
"OPPOSITION_CASE_COLOR = '#b91c1c'\n",
|
|
"GENERATOR_CAPACITY_COLOR = '#15803d'\n",
|
|
"UTILITY_RATE_COLOR = '#0f766e'\n",
|
|
"\n",
|
|
"# NRI hazard prefix -> human-readable label, used in the per-DC popup.\n",
|
|
"NRI_HAZARDS = [\n",
|
|
" ('avln', 'Avalanche'), ('cfld', 'Coastal flood'),\n",
|
|
" ('cwav', 'Cold wave'), ('drgt', 'Drought'),\n",
|
|
" ('erqk', 'Earthquake'), ('hail', 'Hail'),\n",
|
|
" ('hwav', 'Heat wave'), ('hrcn', 'Hurricane'),\n",
|
|
" ('istm', 'Ice storm'), ('lnds', 'Landslide'),\n",
|
|
" ('ltng', 'Lightning'), ('ifld', 'Inland flood'),\n",
|
|
" ('swnd', 'Strong wind'), ('trnd', 'Tornado'),\n",
|
|
" ('tsun', 'Tsunami'), ('vlcn', 'Volcanic activity'),\n",
|
|
" ('wfir', 'Wildfire'), ('wntw', 'Winter weather'),\n",
|
|
"]\n",
|
|
"\n",
|
|
"cluster_info = clusters.set_index('cluster_id').to_dict('index')\n",
|
|
"\n",
|
|
"\n",
|
|
"def clean_value(value):\n",
|
|
" if pd.isna(value):\n",
|
|
" return ''\n",
|
|
" return escape(str(value))\n",
|
|
"\n",
|
|
"\n",
|
|
"def fmt_number(value, decimals=0, prefix='', suffix=''):\n",
|
|
" if pd.isna(value):\n",
|
|
" return ''\n",
|
|
" try:\n",
|
|
" value = float(value)\n",
|
|
" except (TypeError, ValueError):\n",
|
|
" return clean_value(value)\n",
|
|
" return f\"{prefix}{value:,.{decimals}f}{suffix}\"\n",
|
|
"\n",
|
|
"\n",
|
|
"def fmt_pct_ratio(value, decimals=1):\n",
|
|
" if pd.isna(value):\n",
|
|
" return ''\n",
|
|
" try:\n",
|
|
" value = float(value) * 100.0\n",
|
|
" except (TypeError, ValueError):\n",
|
|
" return clean_value(value)\n",
|
|
" return fmt_number(value, decimals, suffix='%')\n",
|
|
"\n",
|
|
"\n",
|
|
"def cluster_color(cluster_id):\n",
|
|
" if cluster_id == -1:\n",
|
|
" return NOISE_COLOR\n",
|
|
" info = cluster_info.get(cluster_id, {})\n",
|
|
" rank = int(info.get('cluster_rank', cluster_id + 1))\n",
|
|
" return CLUSTER_COLORS[(rank - 1) % len(CLUSTER_COLORS)]\n",
|
|
"\n",
|
|
"\n",
|
|
"def cluster_label_and_size(cluster_id):\n",
|
|
" if cluster_id == -1:\n",
|
|
" return 'Noise / isolated', '1', ''\n",
|
|
" info = cluster_info.get(cluster_id, {})\n",
|
|
" rank = int(info.get('cluster_rank', cluster_id + 1))\n",
|
|
" point_count = int(info.get('point_count', 0))\n",
|
|
" return f'Cluster ID {cluster_id}', f'{point_count:,}', f'Rank {rank} of {n_clusters} by size'\n",
|
|
"\n",
|
|
"\n",
|
|
"def climate_color(mean_summer_c):\n",
|
|
" if pd.isna(mean_summer_c):\n",
|
|
" return '#94a3b8'\n",
|
|
" if mean_summer_c >= 32:\n",
|
|
" return '#7f1d1d'\n",
|
|
" if mean_summer_c >= 29:\n",
|
|
" return '#b91c1c'\n",
|
|
" if mean_summer_c >= 26:\n",
|
|
" return '#ea580c'\n",
|
|
" if mean_summer_c >= 23:\n",
|
|
" return '#f59e0b'\n",
|
|
" return '#0284c7'\n",
|
|
"\n",
|
|
"\n",
|
|
"def broadband_color(provider_count):\n",
|
|
" if pd.isna(provider_count):\n",
|
|
" return '#94a3b8'\n",
|
|
" p = float(provider_count)\n",
|
|
" if p >= 20:\n",
|
|
" return '#166534'\n",
|
|
" if p >= 10:\n",
|
|
" return '#16a34a'\n",
|
|
" if p >= 5:\n",
|
|
" return '#65a30d'\n",
|
|
" if p >= 2:\n",
|
|
" return '#ca8a04'\n",
|
|
" return '#b45309'\n",
|
|
"\n",
|
|
"\n",
|
|
"def election_color(margin_pct):\n",
|
|
" if pd.isna(margin_pct):\n",
|
|
" return '#94a3b8'\n",
|
|
" m = float(margin_pct)\n",
|
|
" if m >= 20:\n",
|
|
" return '#7f1d1d'\n",
|
|
" if m >= 5:\n",
|
|
" return '#dc2626'\n",
|
|
" if m <= -20:\n",
|
|
" return '#1e3a8a'\n",
|
|
" if m <= -5:\n",
|
|
" return '#2563eb'\n",
|
|
" return '#6b7280'\n",
|
|
"\n",
|
|
"\n",
|
|
"def nri_color(risk_score):\n",
|
|
" \"\"\"FEMA NRI composite RISK_SCORE color ramp (0-100, higher = more risk).\"\"\"\n",
|
|
" if pd.isna(risk_score):\n",
|
|
" return '#94a3b8'\n",
|
|
" r = float(risk_score)\n",
|
|
" if r >= 80:\n",
|
|
" return '#7f1d1d'\n",
|
|
" if r >= 60:\n",
|
|
" return '#dc2626'\n",
|
|
" if r >= 40:\n",
|
|
" return '#ea580c'\n",
|
|
" if r >= 20:\n",
|
|
" return '#ca8a04'\n",
|
|
" return '#0284c7'\n",
|
|
"\n",
|
|
"\n",
|
|
"def generator_capacity_color(source_code):\n",
|
|
" code = clean_value(source_code).upper()\n",
|
|
" source_colors = {\n",
|
|
" 'NG': '#f97316', # natural gas\n",
|
|
" 'SUN': '#facc15', # solar\n",
|
|
" 'WND': '#16a34a', # wind\n",
|
|
" 'WAT': '#0284c7', # hydro\n",
|
|
" 'NUC': '#7c3aed', # nuclear\n",
|
|
" 'BIT': '#111827', 'SUB': '#374151', 'LIG': '#4b5563',\n",
|
|
" 'DFO': '#b45309', 'RFO': '#92400e',\n",
|
|
" }\n",
|
|
" return source_colors.get(code, GENERATOR_CAPACITY_COLOR)\n",
|
|
"\n",
|
|
"\n",
|
|
"def utility_rate_color(avg_pct_ratio):\n",
|
|
" if pd.isna(avg_pct_ratio):\n",
|
|
" return '#94a3b8'\n",
|
|
" pct = float(avg_pct_ratio) * 100.0\n",
|
|
" if pct >= 15:\n",
|
|
" return '#7f1d1d'\n",
|
|
" if pct >= 10:\n",
|
|
" return '#dc2626'\n",
|
|
" if pct >= 5:\n",
|
|
" return '#f59e0b'\n",
|
|
" return '#0284c7'\n",
|
|
"\n",
|
|
"\n",
|
|
"def top_nri_hazards(row, n=3):\n",
|
|
" \"\"\"Return the top-N hazards by risk score for this DC, as 'Label: score' strings.\"\"\"\n",
|
|
" pairs = []\n",
|
|
" for prefix, label in NRI_HAZARDS:\n",
|
|
" attr = f'nri_{prefix}_risk'\n",
|
|
" val = getattr(row, attr, None)\n",
|
|
" if val is not None and pd.notna(val):\n",
|
|
" pairs.append((label, float(val)))\n",
|
|
" pairs.sort(key=lambda p: p[1], reverse=True)\n",
|
|
" return [f'{label}: {score:.1f}' for label, score in pairs[:n]]\n",
|
|
"\n",
|
|
"\n",
|
|
"def point_popup(row):\n",
|
|
" cluster_label, cluster_size, cluster_rank = cluster_label_and_size(row.cluster_id)\n",
|
|
" nearest = row.nearest_neighbor_km\n",
|
|
" nearest_text = f'{nearest:.2f} km' if pd.notna(nearest) else ''\n",
|
|
" title = clean_value(row.name) or clean_value(row.master_id)\n",
|
|
"\n",
|
|
" huc8_lines = ''\n",
|
|
" if hasattr(row, 'huc8') and pd.notna(row.huc8):\n",
|
|
" huc8_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Watershed</strong><br>\n",
|
|
" HUC8: {clean_value(row.huc8)}<br>\n",
|
|
" Name: {clean_value(row.huc8_name)}<br>\n",
|
|
" States: {clean_value(row.huc8_states)}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" ruca_lines = ''\n",
|
|
" if hasattr(row, 'primary_ruca') and pd.notna(row.primary_ruca):\n",
|
|
" ruca_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>RUCA / tract context</strong><br>\n",
|
|
" RUCA band: {clean_value(row.ruca_band)}<br>\n",
|
|
" RUCA code: {fmt_number(row.primary_ruca)}<br>\n",
|
|
" {clean_value(row.primary_ruca_description)}<br>\n",
|
|
" Median HH income: {fmt_number(row.median_household_income, prefix='$')}<br>\n",
|
|
" Bachelor's+: {fmt_number(row.bachelor_or_higher_pct, 1, suffix='%')}<br>\n",
|
|
" Poverty: {fmt_number(row.poverty_rate, 1, suffix='%')}<br>\n",
|
|
" Non-Hispanic white: {fmt_number(row.non_hispanic_white_pct, 1, suffix='%')}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" energy_lines = ''\n",
|
|
" if hasattr(row, 'im3_projected_it_power_mw') and pd.notna(row.im3_projected_it_power_mw):\n",
|
|
" if hasattr(row, 'seds_series_count') and pd.notna(row.seds_series_count):\n",
|
|
" seds_note = f\"SEDS year: {fmt_number(row.seds_latest_year)}; series: {fmt_number(row.seds_series_count)}<br>\"\n",
|
|
" else:\n",
|
|
" seds_note = 'SEDS context: unavailable in seds_state_msn_year<br>'\n",
|
|
" energy_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>State energy demand context</strong><br>\n",
|
|
" IM3 projected IT power: {fmt_number(row.im3_projected_it_power_mw, suffix=' MW')}<br>\n",
|
|
" IM3 cooling water demand: {fmt_number(row.im3_cooling_water_demand_mgy, 1, suffix=' MGY')}<br>\n",
|
|
" IM3 water consumption: {fmt_number(row.im3_cooling_water_consumption_mgy, 1, suffix=' MGY')}<br>\n",
|
|
" IM3 avg siting score: {fmt_number(row.im3_avg_weighted_siting_score, 3)}<br>\n",
|
|
" {seds_note}\n",
|
|
" '''\n",
|
|
"\n",
|
|
" utility_rate_lines = ''\n",
|
|
" if hasattr(row, 'utility_rate_case_count') and pd.notna(row.utility_rate_case_count):\n",
|
|
" utility_rate_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Utility rate tracker (2025-2028)</strong><br>\n",
|
|
" State: {clean_value(row.utility_state_name)}<br>\n",
|
|
" Tracker cases: {fmt_number(row.utility_rate_case_count)}<br>\n",
|
|
" Utilities: {fmt_number(row.utility_rate_provider_count)}<br>\n",
|
|
" Electric / gas cases: {fmt_number(row.utility_rate_electric_case_count)} / {fmt_number(row.utility_rate_gas_case_count)}<br>\n",
|
|
" Total revenue increase: {fmt_number(row.utility_rate_total_revenue_increase_2025_2028, 0, prefix='$')}<br>\n",
|
|
" Avg monthly increase: {fmt_number(row.utility_rate_avg_monthly_increase_amount, 2, prefix='$')}<br>\n",
|
|
" Avg monthly % increase: {fmt_pct_ratio(row.utility_rate_avg_monthly_pct_increase_ratio, 1)}<br>\n",
|
|
" Effective dates: {clean_value(row.utility_rate_first_effective_date)} to {clean_value(row.utility_rate_last_effective_date)}<br>\n",
|
|
" Top utilities: {clean_value(row.utility_rate_top_utilities)}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" drought_lines = ''\n",
|
|
" if hasattr(row, 'usdm_status') and pd.notna(row.usdm_status):\n",
|
|
" drought_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Drought context (USDM)</strong><br>\n",
|
|
" Status: {clean_value(row.usdm_status)}<br>\n",
|
|
" Worst DM category: {fmt_number(row.worst_dm_category)}<br>\n",
|
|
" Mean DM category: {fmt_number(row.mean_dm_category, 2)}<br>\n",
|
|
" % weeks D2+: {fmt_number(row.pct_weeks_in_d2_or_worse, 1, suffix='%')}<br>\n",
|
|
" % weeks D3+: {fmt_number(row.pct_weeks_in_d3_or_worse, 1, suffix='%')}<br>\n",
|
|
" Longest D2 streak: {fmt_number(row.longest_d2_streak_weeks)} weeks<br>\n",
|
|
" Longest D3 streak: {fmt_number(row.longest_d3_streak_weeks)} weeks<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" smoke_lines = ''\n",
|
|
" if hasattr(row, 'hms_status') and pd.notna(row.hms_status):\n",
|
|
" smoke_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Wildfire smoke context (HMS)</strong><br>\n",
|
|
" Status: {clean_value(row.hms_status)}<br>\n",
|
|
" Observed days: {fmt_number(row.days_observed)}<br>\n",
|
|
" Any-smoke days: {fmt_number(row.days_with_any_smoke)} ({fmt_number(row.pct_days_with_any_smoke, 1, suffix='%')})<br>\n",
|
|
" Heavy-smoke days: {fmt_number(row.days_with_heavy_smoke)} ({fmt_number(row.pct_days_with_heavy_smoke, 1, suffix='%')})<br>\n",
|
|
" Worst density class: {clean_value(row.worst_density)}<br>\n",
|
|
" Mean density rank: {fmt_number(row.mean_density_rank, 2)}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" climate_lines = ''\n",
|
|
" if hasattr(row, 'mean_summer_temperature_c') and pd.notna(row.mean_summer_temperature_c):\n",
|
|
" climate_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Climate context</strong><br>\n",
|
|
" Mean annual temp: {fmt_number(row.mean_annual_temperature_c, 1, suffix=' C')}<br>\n",
|
|
" Mean summer temp: {fmt_number(row.mean_summer_temperature_c, 1, suffix=' C')}<br>\n",
|
|
" Max wet-bulb temp: {fmt_number(row.max_wet_bulb_temperature_c, 1, suffix=' C')}<br>\n",
|
|
" Extreme heat days: {fmt_number(row.extreme_heat_days)}<br>\n",
|
|
" Annual CDD mean: {fmt_number(row.annual_cooling_degree_days_c_mean, 0)}<br>\n",
|
|
" Annual precip mean: {fmt_number(row.annual_precipitation_mm_mean, 0, suffix=' mm')}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" broadband_lines = ''\n",
|
|
" if hasattr(row, 'fcc_bdc_status') and pd.notna(row.fcc_bdc_status):\n",
|
|
" broadband_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Broadband context</strong><br>\n",
|
|
" FCC BDC status: {clean_value(row.fcc_bdc_status)}<br>\n",
|
|
" FCC as-of date: {clean_value(row.fcc_bdc_as_of_date)}<br>\n",
|
|
" Census broadband subscription: {fmt_number(row.census_broadband_subscription_pct, 1, suffix='%')}<br>\n",
|
|
" Provider count: {fmt_number(row.fcc_provider_count)}<br>\n",
|
|
" Fiber providers: {fmt_number(row.fcc_fiber_provider_count)}<br>\n",
|
|
" Cable providers: {fmt_number(row.fcc_cable_provider_count)}<br>\n",
|
|
" Fixed wireless providers: {fmt_number(row.fcc_fixed_wireless_provider_count)}<br>\n",
|
|
" Max advertised down/up: {fmt_number(row.fcc_max_advertised_download_mbps, 0, suffix=' /')} {fmt_number(row.fcc_max_advertised_upload_mbps, 0, suffix=' Mbps')}<br>\n",
|
|
" Providers >=100/20: {fmt_number(row.fcc_100_20_provider_count)}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" election_lines = ''\n",
|
|
" has_2020 = hasattr(row, 'election_2020_dem_share_pct') and pd.notna(row.election_2020_dem_share_pct)\n",
|
|
" if has_2020:\n",
|
|
" election_2020_lines = ''\n",
|
|
" if has_2020:\n",
|
|
" election_2020_lines = f'''\n",
|
|
" <strong>2020</strong><br>\n",
|
|
" Dem share: {fmt_number(row.election_2020_dem_share_pct, 1, suffix='%')}<br>\n",
|
|
" Rep share: {fmt_number(row.election_2020_rep_share_pct, 1, suffix='%')}<br>\n",
|
|
" Rep margin: {fmt_number(row.election_2020_rep_margin_pct, 1, suffix=' pp')}<br>\n",
|
|
" Dem / Rep votes: {fmt_number(row.election_2020_dem_votes)} / {fmt_number(row.election_2020_rep_votes)}<br>\n",
|
|
" Two-party votes: {fmt_number(row.election_2020_total_votes)}<br>\n",
|
|
" '''\n",
|
|
" election_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>Election context (precinct match)</strong><br>\n",
|
|
" State: {clean_value(row.election_state_code)}<br>\n",
|
|
" Locality: {clean_value(row.election_locality)}<br>\n",
|
|
" Precinct: {clean_value(row.election_precinct)}<br>\n",
|
|
" Join method: {clean_value(row.election_join_method)}<br>\n",
|
|
" Match distance (m): {fmt_number(row.election_match_distance_m, 0)}<br>\n",
|
|
" {election_2020_lines}\n",
|
|
" '''\n",
|
|
"\n",
|
|
" nri_lines = ''\n",
|
|
" if hasattr(row, 'nri_status') and pd.notna(row.nri_status):\n",
|
|
" top3 = top_nri_hazards(row, n=3)\n",
|
|
" top3_html = ('<br>'.join(top3)) if top3 else 'n/a'\n",
|
|
" nri_lines = f'''\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>FEMA NRI context (Dec 2025)</strong><br>\n",
|
|
" Status: {clean_value(row.nri_status)}<br>\n",
|
|
" Census tract: {clean_value(row.nri_tractfips)}<br>\n",
|
|
" Composite RISK: {fmt_number(row.nri_risk_score, 1)} ({clean_value(row.nri_risk_rating)})<br>\n",
|
|
" EAL score: {fmt_number(row.nri_eal_score, 1)} ({clean_value(row.nri_eal_rating)})<br>\n",
|
|
" EAL total $/yr: {fmt_number(row.nri_eal_total_usd, 0, prefix='$')}<br>\n",
|
|
" SOVI (social vuln): {fmt_number(row.nri_sovi_score, 1)} ({clean_value(row.nri_sovi_rating)})<br>\n",
|
|
" RESL (resilience): {fmt_number(row.nri_resl_score, 1)} ({clean_value(row.nri_resl_rating)})<br>\n",
|
|
" Top hazards (risk score):<br>\n",
|
|
" {top3_html}<br>\n",
|
|
" '''\n",
|
|
"\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 310px; max-width: 420px;\">\n",
|
|
" <strong>{title}</strong><br>\n",
|
|
" {clean_value(row.city)}, {clean_value(row.state)}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" <strong>{cluster_label}</strong><br>\n",
|
|
" {cluster_rank}<br>\n",
|
|
" Cluster size: {cluster_size} data center(s)<br>\n",
|
|
" Source: {clean_value(row.source)}<br>\n",
|
|
" Operator: {clean_value(row.operator)}<br>\n",
|
|
" Nearest neighbor: {nearest_text}<br>\n",
|
|
" Master ID: {clean_value(row.master_id)}\n",
|
|
" {huc8_lines}\n",
|
|
" {ruca_lines}\n",
|
|
" {energy_lines}\n",
|
|
" {utility_rate_lines}\n",
|
|
" {drought_lines}\n",
|
|
" {smoke_lines}\n",
|
|
" {climate_lines}\n",
|
|
" {broadband_lines}\n",
|
|
" {election_lines}\n",
|
|
" {nri_lines}\n",
|
|
" </div>\n",
|
|
" ''', max_width=460)\n",
|
|
"\n",
|
|
"\n",
|
|
"def centroid_popup(row):\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
|
|
" <strong>Cluster ID {int(row.cluster_id)}</strong><br>\n",
|
|
" Rank {int(row.cluster_rank)} of {n_clusters} by size<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" Points: {int(row.point_count):,}<br>\n",
|
|
" p50 radius: {row.radius_km_p50:.1f} km<br>\n",
|
|
" p90 radius: {row.radius_km_p90:.1f} km<br>\n",
|
|
" Max radius: {row.radius_km_max:.1f} km<br>\n",
|
|
" States: {clean_value(row.states)}<br>\n",
|
|
" Cities: {clean_value(row.cities)}<br>\n",
|
|
" Operators: {clean_value(row.operators)}\n",
|
|
" </div>\n",
|
|
" ''', max_width=420)\n",
|
|
"\n",
|
|
"\n",
|
|
"def huc8_style(feature):\n",
|
|
" count = feature['properties'].get('data_center_count') or 0\n",
|
|
" if count >= 100:\n",
|
|
" fill = '#075985'\n",
|
|
" elif count >= 50:\n",
|
|
" fill = '#0284c7'\n",
|
|
" elif count >= 20:\n",
|
|
" fill = '#38bdf8'\n",
|
|
" elif count >= 10:\n",
|
|
" fill = '#7dd3fc'\n",
|
|
" else:\n",
|
|
" fill = '#bae6fd'\n",
|
|
" return {'fillColor': fill, 'color': '#0369a1', 'weight': 1, 'fillOpacity': 0.22}\n",
|
|
"\n",
|
|
"\n",
|
|
"def huc8_popup(feature):\n",
|
|
" p = feature['properties']\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
|
|
" <strong>{clean_value(p.get('name'))}</strong><br>\n",
|
|
" HUC8: {clean_value(p.get('huc8'))}<br>\n",
|
|
" States: {clean_value(p.get('states'))}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" Data centers: {fmt_number(p.get('data_center_count'))}<br>\n",
|
|
" Clustered DCs: {fmt_number(p.get('clustered_data_center_count'))}<br>\n",
|
|
" Distinct clusters: {fmt_number(p.get('cluster_count'))}<br>\n",
|
|
" Area: {fmt_number(p.get('areasqkm'), 0, suffix=' sq km')}\n",
|
|
" </div>\n",
|
|
" ''', max_width=360)\n",
|
|
"\n",
|
|
"\n",
|
|
"def state_energy_popup(row):\n",
|
|
" if hasattr(row, 'seds_series_count') and pd.notna(row.seds_series_count):\n",
|
|
" seds_note = f\"SEDS latest year: {fmt_number(row.seds_latest_year)}; series: {fmt_number(row.seds_series_count)}\"\n",
|
|
" else:\n",
|
|
" seds_note = 'SEDS context: unavailable in seds_state_msn_year'\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
|
|
" <strong>{clean_value(row.state_code)} state energy context</strong><br>\n",
|
|
" Current data centers: {fmt_number(row.current_data_center_count)}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" IM3 projected sites: {fmt_number(row.im3_project_count)}<br>\n",
|
|
" IM3 projected IT power: {fmt_number(row.im3_projected_it_power_mw, suffix=' MW')}<br>\n",
|
|
" IM3 cooling water demand: {fmt_number(row.im3_cooling_water_demand_mgy, 1, suffix=' MGY')}<br>\n",
|
|
" IM3 water consumption: {fmt_number(row.im3_cooling_water_consumption_mgy, 1, suffix=' MGY')}<br>\n",
|
|
" IM3 avg siting score: {fmt_number(row.im3_avg_weighted_siting_score, 3)}<br>\n",
|
|
" {seds_note}\n",
|
|
" </div>\n",
|
|
" ''', max_width=380)\n",
|
|
"\n",
|
|
"\n",
|
|
"def generator_capacity_popup(row):\n",
|
|
" primary = clean_value(row.primary_energy_source_desc) or clean_value(row.primary_energy_source_code)\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 300px; max-width: 430px;\">\n",
|
|
" <strong>{clean_value(row.plant_name) or 'EIA generating plant'}</strong><br>\n",
|
|
" Plant ID: {clean_value(row.plant_id)}<br>\n",
|
|
" State: {clean_value(row.state_id)} ({clean_value(row.state_name)})<br>\n",
|
|
" EIA period: {clean_value(row.period)}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" Nameplate capacity: {fmt_number(row.nameplate_capacity_mw, 1, suffix=' MW')}<br>\n",
|
|
" Net summer capacity: {fmt_number(row.net_summer_capacity_mw, 1, suffix=' MW')}<br>\n",
|
|
" Net winter capacity: {fmt_number(row.net_winter_capacity_mw, 1, suffix=' MW')}<br>\n",
|
|
" Generators: {fmt_number(row.generator_count)}<br>\n",
|
|
" Primary source: {primary}<br>\n",
|
|
" Source mix: {clean_value(row.energy_source_mix)}<br>\n",
|
|
" Balancing authority: {clean_value(row.balancing_authority_code)} {clean_value(row.balancing_authority_name)}<br>\n",
|
|
" Entity: {clean_value(row.entity_names)}\n",
|
|
" </div>\n",
|
|
" ''', max_width=460)\n",
|
|
"\n",
|
|
"\n",
|
|
"def utility_rate_popup(row):\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 300px; max-width: 430px;\">\n",
|
|
" <strong>{clean_value(row.utility_state_name)} utility rate tracker</strong><br>\n",
|
|
" State code: {clean_value(row.utility_state_code)}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" Tracker cases: {fmt_number(row.utility_rate_case_count)}<br>\n",
|
|
" Utility providers: {fmt_number(row.utility_rate_provider_count)}<br>\n",
|
|
" Electric / gas cases: {fmt_number(row.utility_rate_electric_case_count)} / {fmt_number(row.utility_rate_gas_case_count)}<br>\n",
|
|
" Customers represented: {fmt_number(row.utility_rate_customer_count)}<br>\n",
|
|
" Total revenue increase: {fmt_number(row.utility_rate_total_revenue_increase_2025_2028, 0, prefix='$')}<br>\n",
|
|
" Avg monthly increase: {fmt_number(row.utility_rate_avg_monthly_increase_amount, 2, prefix='$')}<br>\n",
|
|
" Avg monthly % increase: {fmt_pct_ratio(row.utility_rate_avg_monthly_pct_increase_ratio, 1)}<br>\n",
|
|
" Effective dates: {clean_value(row.utility_rate_first_effective_date)} to {clean_value(row.utility_rate_last_effective_date)}<br>\n",
|
|
" Top utilities: {clean_value(row.utility_rate_top_utilities)}\n",
|
|
" </div>\n",
|
|
" ''', max_width=460)\n",
|
|
"\n",
|
|
"\n",
|
|
"def cable_style(_feature):\n",
|
|
" return {'color': INTERNET_CABLE_COLOR, 'weight': 1.6, 'opacity': 0.45}\n",
|
|
"\n",
|
|
"\n",
|
|
"def cable_popup(feature):\n",
|
|
" p = feature.get('properties', {})\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
|
|
" <strong>{clean_value(p.get('name') or 'Internet cable')}</strong><br>\n",
|
|
" Owners: {clean_value(p.get('owners'))}<br>\n",
|
|
" Type: {clean_value(p.get('cable_type'))}<br>\n",
|
|
" RFS year: {fmt_number(p.get('rfs_year'))}<br>\n",
|
|
" Decommission year: {fmt_number(p.get('decommission_year'))}<br>\n",
|
|
" Length: {fmt_number(p.get('length_km'), 0, suffix=' km')}<br>\n",
|
|
" Feature ID: {clean_value(p.get('feature_id'))}\n",
|
|
" </div>\n",
|
|
" ''', max_width=380)\n",
|
|
"\n",
|
|
"\n",
|
|
"def opposition_popup(row):\n",
|
|
" return folium.Popup(f'''\n",
|
|
" <div style=\"font-family: system-ui, sans-serif; min-width: 280px;\">\n",
|
|
" <strong>Opposition case {fmt_number(row.id)}</strong><br>\n",
|
|
" Location: {clean_value(row.location)}<br>\n",
|
|
" State: {clean_value(row.state)}<br>\n",
|
|
" <hr style=\"margin: 6px 0;\">\n",
|
|
" Status: {clean_value(row.status)}<br>\n",
|
|
" Developer: {clean_value(row.developer)}<br>\n",
|
|
" Investment: {fmt_number(row.investment_billion, 2, prefix='$', suffix='B')}<br>\n",
|
|
" Opposition type: {clean_value(row.opposition_type)}<br>\n",
|
|
" Commons type: {clean_value(row.commons_type)}<br>\n",
|
|
" Governance response: {clean_value(row.governance_response)}<br>\n",
|
|
" Outcome: {clean_value(row.outcome)}<br>\n",
|
|
" Source: {clean_value(row.data_source)}\n",
|
|
" </div>\n",
|
|
" ''', max_width=400)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "10",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def add_overlay_legend(map_obj: folium.Map) -> None:\n",
|
|
" legend_html = \"\"\"\n",
|
|
" <div style=\"\n",
|
|
" position: fixed;\n",
|
|
" bottom: 30px;\n",
|
|
" left: 30px;\n",
|
|
" z-index: 9999;\n",
|
|
" background: rgba(255, 255, 255, 0.96);\n",
|
|
" border: 1px solid #d1d5db;\n",
|
|
" border-radius: 8px;\n",
|
|
" box-shadow: 0 2px 8px rgba(0,0,0,0.15);\n",
|
|
" padding: 10px 12px;\n",
|
|
" font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif;\n",
|
|
" font-size: 12px;\n",
|
|
" line-height: 1.35;\n",
|
|
" min-width: 280px;\n",
|
|
" max-height: 76vh;\n",
|
|
" overflow-y: auto;\n",
|
|
" \">\n",
|
|
" <div style=\"font-weight: 700; margin-bottom: 6px;\">Overlay Legend</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 4px;\">Climate (mean summer temperature)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#0284c7;margin-right:6px;\"></span>< 23 C</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#f59e0b;margin-right:6px;\"></span>23-25.9 C</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ea580c;margin-right:6px;\"></span>26-28.9 C</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#b91c1c;margin-right:6px;\"></span>29-31.9 C</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>>= 32 C</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 6px;\">Broadband (FCC provider count)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#b45309;margin-right:6px;\"></span>0-1</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ca8a04;margin-right:6px;\"></span>2-4</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#65a30d;margin-right:6px;\"></span>5-9</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#16a34a;margin-right:6px;\"></span>10-19</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#166534;margin-right:6px;\"></span>>= 20</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 6px;\">Election (Trump margin, pp)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#1e3a8a;margin-right:6px;\"></span><= -20</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#2563eb;margin-right:6px;\"></span>-19.9 to -5</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#6b7280;margin-right:6px;\"></span>-4.9 to 4.9</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#dc2626;margin-right:6px;\"></span>5 to 19.9</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>>= 20</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 6px;\">FEMA NRI (composite RISK_SCORE)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#0284c7;margin-right:6px;\"></span>< 20 (very low)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ca8a04;margin-right:6px;\"></span>20-39 (rel. low)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#ea580c;margin-right:6px;\"></span>40-59 (rel. moderate)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#dc2626;margin-right:6px;\"></span>60-79 (rel. high)</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>>= 80 (very high)</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 6px;\">EIA generator capacity</div>\n",
|
|
" <div>Circle size = latest-period plant nameplate MW</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#f97316;margin-right:6px;\"></span>Natural gas</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#facc15;margin-right:6px;\"></span>Solar</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#16a34a;margin-right:6px;\"></span>Wind</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#0284c7;margin-right:6px;\"></span>Hydro</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7c3aed;margin-right:6px;\"></span>Nuclear</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#374151;margin-right:6px;\"></span>Coal / other</div>\n",
|
|
"\n",
|
|
" <div style=\"font-weight: 600; margin-top: 6px;\">Utility rate tracker</div>\n",
|
|
" <div>Circle size = total 2025-2028 revenue increase</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#0284c7;margin-right:6px;\"></span>< 5% avg monthly increase</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#f59e0b;margin-right:6px;\"></span>5-9.9%</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#dc2626;margin-right:6px;\"></span>10-14.9%</div>\n",
|
|
" <div><span style=\"display:inline-block;width:10px;height:10px;background:#7f1d1d;margin-right:6px;\"></span>>= 15%</div>\n",
|
|
" </div>\n",
|
|
" \"\"\"\n",
|
|
" map_obj.get_root().html.add_child(folium.Element(legend_html))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "11",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Build The Map"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "12",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def build_cluster_map(points_df: pd.DataFrame, clusters_df: pd.DataFrame) -> folium.Map:\n",
|
|
" m = folium.Map(location=MAP_CENTER, zoom_start=MAP_ZOOM, tiles=BASE_TILES, control_scale=True)\n",
|
|
" plugins.Fullscreen(position='topleft').add_to(m)\n",
|
|
" plugins.MeasureControl(position='topleft', primary_length_unit='kilometers').add_to(m)\n",
|
|
" plugins.MiniMap(toggle_display=True, minimized=True).add_to(m)\n",
|
|
"\n",
|
|
" huc8_layer = folium.FeatureGroup(name='HUC8 watersheds with data centers', show=False)\n",
|
|
" state_energy_layer = folium.FeatureGroup(name='State energy demand context (IM3 / SEDS)', show=False)\n",
|
|
" cables_layer = folium.FeatureGroup(name='Internet cable network', show=False)\n",
|
|
" opposition_layer = folium.FeatureGroup(name='Opposition cases', show=False)\n",
|
|
" generator_capacity_layer = folium.FeatureGroup(name='EIA operating generator capacity (latest)', show=False)\n",
|
|
" utility_rate_layer = folium.FeatureGroup(name='Utility rate tracker (2025-2028)', show=False)\n",
|
|
" climate_layer = folium.FeatureGroup(name='Climate stress context', show=False)\n",
|
|
" broadband_layer = folium.FeatureGroup(name='Broadband capacity context', show=False)\n",
|
|
" election_2020_layer = folium.FeatureGroup(name='Election context (2020 precinct match)', show=False)\n",
|
|
" nri_layer = folium.FeatureGroup(name='FEMA NRI multi-hazard risk', show=False)\n",
|
|
" clustered_layer = folium.FeatureGroup(name='Data centers: clustered', show=True)\n",
|
|
" noise_layer = folium.FeatureGroup(name='Data centers: noise / isolated', show=True)\n",
|
|
" centroid_layer = folium.FeatureGroup(name='Cluster centroids and p90 radius', show=True)\n",
|
|
"\n",
|
|
" if SHOW_HUC8_LAYER and huc8_geojson is not None:\n",
|
|
" folium.GeoJson(\n",
|
|
" huc8_geojson,\n",
|
|
" name='HUC8 watersheds with data centers',\n",
|
|
" style_function=huc8_style,\n",
|
|
" highlight_function=lambda feature: {'weight': 3, 'fillOpacity': 0.35},\n",
|
|
" tooltip=folium.GeoJsonTooltip(\n",
|
|
" fields=['name', 'huc8', 'data_center_count', 'cluster_count'],\n",
|
|
" aliases=['HUC8', 'Code', 'Data centers', 'Clusters'],\n",
|
|
" localize=True,\n",
|
|
" sticky=False,\n",
|
|
" ),\n",
|
|
" popup=huc8_popup,\n",
|
|
" ).add_to(huc8_layer)\n",
|
|
"\n",
|
|
" if SHOW_STATE_ENERGY_LAYER and not state_energy.empty:\n",
|
|
" for row in state_energy.dropna(subset=['map_latitude', 'map_longitude']).itertuples(index=False):\n",
|
|
" power = getattr(row, 'im3_projected_it_power_mw')\n",
|
|
" radius = 6 if pd.isna(power) else max(6, min(28, 4 + float(power) ** 0.5 / 2.4))\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.map_latitude, row.map_longitude],\n",
|
|
" radius=radius,\n",
|
|
" color='#92400e',\n",
|
|
" fill=True,\n",
|
|
" fill_color=STATE_ENERGY_COLOR,\n",
|
|
" fill_opacity=0.55,\n",
|
|
" weight=1.5,\n",
|
|
" popup=state_energy_popup(row),\n",
|
|
" tooltip=f'{row.state_code}: IM3 {fmt_number(power, suffix=\" MW\")}',\n",
|
|
" ).add_to(state_energy_layer)\n",
|
|
"\n",
|
|
" if SHOW_INTERNET_CABLES_LAYER and internet_cables_geojson is not None:\n",
|
|
" folium.GeoJson(\n",
|
|
" internet_cables_geojson,\n",
|
|
" name='Internet cable network',\n",
|
|
" style_function=cable_style,\n",
|
|
" highlight_function=lambda _f: {'weight': 3.0, 'opacity': 0.85},\n",
|
|
" popup=cable_popup,\n",
|
|
" tooltip=folium.GeoJsonTooltip(\n",
|
|
" fields=['name', 'cable_type', 'rfs_year'],\n",
|
|
" aliases=['Cable', 'Type', 'RFS year'],\n",
|
|
" localize=True,\n",
|
|
" sticky=False,\n",
|
|
" ),\n",
|
|
" ).add_to(cables_layer)\n",
|
|
"\n",
|
|
" if SHOW_OPPOSITION_CASES_LAYER and not opposition_cases.empty:\n",
|
|
" for row in opposition_cases.itertuples(index=False):\n",
|
|
" marker_radius = 5 if pd.isna(row.investment_billion) else max(5, min(14, 4 + float(row.investment_billion) ** 0.5 * 2.2))\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.lat, row.lon],\n",
|
|
" radius=marker_radius,\n",
|
|
" color='#7f1d1d',\n",
|
|
" fill=True,\n",
|
|
" fill_color=OPPOSITION_CASE_COLOR,\n",
|
|
" fill_opacity=0.75,\n",
|
|
" weight=1.2,\n",
|
|
" popup=opposition_popup(row),\n",
|
|
" tooltip=f\"Opposition case: {row.state} ({clean_value(row.status)})\",\n",
|
|
" ).add_to(opposition_layer)\n",
|
|
"\n",
|
|
" if SHOW_EIA_GENERATOR_CAPACITY_LAYER and not generator_capacity_plants.empty:\n",
|
|
" gen_rows = generator_capacity_plants.dropna(subset=['latitude', 'longitude'])\n",
|
|
" for row in gen_rows.itertuples(index=False):\n",
|
|
" capacity = float(row.nameplate_capacity_mw) if pd.notna(row.nameplate_capacity_mw) else 0.0\n",
|
|
" radius = max(4, min(18, 3 + capacity ** 0.5 / 6.0))\n",
|
|
" color = generator_capacity_color(row.primary_energy_source_code)\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.latitude, row.longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.45,\n",
|
|
" weight=1,\n",
|
|
" popup=generator_capacity_popup(row),\n",
|
|
" tooltip=(\n",
|
|
" f\"EIA generator: {clean_value(row.plant_name)}; \"\n",
|
|
" f\"{fmt_number(row.nameplate_capacity_mw, 0, suffix=' MW')}\"\n",
|
|
" ),\n",
|
|
" ).add_to(generator_capacity_layer)\n",
|
|
"\n",
|
|
" if (\n",
|
|
" SHOW_UTILITY_RATE_TRACKER_LAYER\n",
|
|
" and not utility_rate_state_context.empty\n",
|
|
" and {'map_latitude', 'map_longitude'}.issubset(utility_rate_state_context.columns)\n",
|
|
" ):\n",
|
|
" rate_rows = utility_rate_state_context.dropna(subset=['map_latitude', 'map_longitude'])\n",
|
|
" for row in rate_rows.itertuples(index=False):\n",
|
|
" revenue_b = (\n",
|
|
" float(row.utility_rate_total_revenue_increase_2025_2028) / 1_000_000_000.0\n",
|
|
" if pd.notna(row.utility_rate_total_revenue_increase_2025_2028)\n",
|
|
" else 0.0\n",
|
|
" )\n",
|
|
" radius = max(6, min(26, 5 + revenue_b ** 0.5 * 4.0))\n",
|
|
" color = utility_rate_color(row.utility_rate_avg_monthly_pct_increase_ratio)\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.map_latitude, row.map_longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.48,\n",
|
|
" weight=1.2,\n",
|
|
" popup=utility_rate_popup(row),\n",
|
|
" tooltip=(\n",
|
|
" f\"Utility rates {row.utility_state_code}: \"\n",
|
|
" f\"{fmt_number(row.utility_rate_total_revenue_increase_2025_2028, 0, prefix='$')} total increase; \"\n",
|
|
" f\"avg monthly {fmt_pct_ratio(row.utility_rate_avg_monthly_pct_increase_ratio, 1)}\"\n",
|
|
" ),\n",
|
|
" ).add_to(utility_rate_layer)\n",
|
|
"\n",
|
|
" if SHOW_CLIMATE_LAYER:\n",
|
|
" climate_rows = points_df.dropna(subset=['mean_summer_temperature_c']) if 'mean_summer_temperature_c' in points_df.columns else pd.DataFrame()\n",
|
|
" for row in climate_rows.itertuples(index=False):\n",
|
|
" color = climate_color(row.mean_summer_temperature_c)\n",
|
|
" radius = max(4, min(12, 3 + (float(row.extreme_heat_days) if pd.notna(row.extreme_heat_days) else 0.0) ** 0.5 / 2.0))\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.latitude, row.longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.35,\n",
|
|
" weight=1,\n",
|
|
" tooltip=f\"Climate: summer {fmt_number(row.mean_summer_temperature_c, 1, suffix=' C')}; heat days {fmt_number(row.extreme_heat_days)}\",\n",
|
|
" ).add_to(climate_layer)\n",
|
|
"\n",
|
|
" if SHOW_BROADBAND_LAYER:\n",
|
|
" bb_rows = points_df.dropna(subset=['fcc_provider_count']) if 'fcc_provider_count' in points_df.columns else pd.DataFrame()\n",
|
|
" for row in bb_rows.itertuples(index=False):\n",
|
|
" color = broadband_color(row.fcc_provider_count)\n",
|
|
" speed = float(row.fcc_max_advertised_download_mbps) if pd.notna(row.fcc_max_advertised_download_mbps) else 0.0\n",
|
|
" radius = max(4, min(12, 4 + speed ** 0.5 / 10.0))\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.latitude, row.longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.3,\n",
|
|
" weight=1,\n",
|
|
" tooltip=f\"Broadband: providers {fmt_number(row.fcc_provider_count)}; max down {fmt_number(row.fcc_max_advertised_download_mbps, 0, suffix=' Mbps')}\",\n",
|
|
" ).add_to(broadband_layer)\n",
|
|
"\n",
|
|
" if SHOW_ELECTION_LAYER and SHOW_ELECTION_2020_LAYER and not election_context.empty:\n",
|
|
" e20_rows = election_context.dropna(subset=['election_latitude', 'election_longitude', 'election_2020_rep_margin_pct'])\n",
|
|
" for row in e20_rows.itertuples(index=False):\n",
|
|
" margin = getattr(row, 'election_2020_rep_margin_pct')\n",
|
|
" color = election_color(margin)\n",
|
|
" radius = max(4, min(11, 4 + abs(float(margin)) / 8.0)) if pd.notna(margin) else 5\n",
|
|
" tip = (\n",
|
|
" f\"Election 2020: {row.election_state_code} {clean_value(row.election_locality)}; \"\n",
|
|
" f\"Dem {fmt_number(row.election_2020_dem_share_pct, 1, suffix='%')} / \"\n",
|
|
" f\"Rep {fmt_number(row.election_2020_rep_share_pct, 1, suffix='%')}\"\n",
|
|
" )\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.election_latitude, row.election_longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.35,\n",
|
|
" weight=1,\n",
|
|
" tooltip=tip,\n",
|
|
" ).add_to(election_2020_layer)\n",
|
|
"\n",
|
|
" if SHOW_NRI_LAYER:\n",
|
|
" nri_rows = points_df.dropna(subset=['nri_risk_score']) if 'nri_risk_score' in points_df.columns else pd.DataFrame()\n",
|
|
" for row in nri_rows.itertuples(index=False):\n",
|
|
" color = nri_color(row.nri_risk_score)\n",
|
|
" # Scale marker by composite RISK_SCORE so higher-risk DCs visually stand out.\n",
|
|
" score = float(row.nri_risk_score)\n",
|
|
" radius = max(4, min(14, 3 + score / 8.0))\n",
|
|
" top_label = ''\n",
|
|
" top3 = top_nri_hazards(row, n=1)\n",
|
|
" if top3:\n",
|
|
" top_label = f\"; top hazard {top3[0]}\"\n",
|
|
" tip = (\n",
|
|
" f\"NRI risk {fmt_number(row.nri_risk_score, 1)} \"\n",
|
|
" f\"({clean_value(row.nri_risk_rating)}){top_label}\"\n",
|
|
" )\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=[row.latitude, row.longitude],\n",
|
|
" radius=radius,\n",
|
|
" color=color,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.4,\n",
|
|
" weight=1,\n",
|
|
" tooltip=tip,\n",
|
|
" ).add_to(nri_layer)\n",
|
|
"\n",
|
|
" bounds = []\n",
|
|
" for row in points_df.itertuples(index=False):\n",
|
|
" cluster_label, cluster_size, _ = cluster_label_and_size(row.cluster_id)\n",
|
|
" marker = folium.CircleMarker(\n",
|
|
" location=[row.latitude, row.longitude],\n",
|
|
" radius=NOISE_RADIUS if row.cluster_id == -1 else CLUSTERED_RADIUS,\n",
|
|
" color=cluster_color(row.cluster_id),\n",
|
|
" fill=True,\n",
|
|
" fill_opacity=0.75,\n",
|
|
" weight=1,\n",
|
|
" popup=point_popup(row),\n",
|
|
" tooltip=f'{cluster_label}; size={cluster_size}',\n",
|
|
" )\n",
|
|
" if row.cluster_id == -1:\n",
|
|
" marker.add_to(noise_layer)\n",
|
|
" else:\n",
|
|
" marker.add_to(clustered_layer)\n",
|
|
" bounds.append([row.latitude, row.longitude])\n",
|
|
"\n",
|
|
" for row in clusters_df.itertuples(index=False):\n",
|
|
" color = cluster_color(int(row.cluster_id))\n",
|
|
" location = [row.centroid_latitude, row.centroid_longitude]\n",
|
|
" if SHOW_CENTROID_P90_CIRCLES and pd.notna(row.radius_km_p90):\n",
|
|
" folium.Circle(\n",
|
|
" location=location,\n",
|
|
" radius=float(row.radius_km_p90) * 1000,\n",
|
|
" color=color,\n",
|
|
" fill=False,\n",
|
|
" weight=1,\n",
|
|
" opacity=0.45,\n",
|
|
" ).add_to(centroid_layer)\n",
|
|
" folium.CircleMarker(\n",
|
|
" location=location,\n",
|
|
" radius=CENTROID_RADIUS,\n",
|
|
" color=CENTROID_COLOR,\n",
|
|
" fill=True,\n",
|
|
" fill_color=color,\n",
|
|
" fill_opacity=0.95,\n",
|
|
" weight=2,\n",
|
|
" popup=centroid_popup(row),\n",
|
|
" tooltip=f'Cluster {int(row.cluster_id)} centroid; {int(row.point_count):,} points',\n",
|
|
" ).add_to(centroid_layer)\n",
|
|
"\n",
|
|
" huc8_layer.add_to(m)\n",
|
|
" state_energy_layer.add_to(m)\n",
|
|
" cables_layer.add_to(m)\n",
|
|
" opposition_layer.add_to(m)\n",
|
|
" generator_capacity_layer.add_to(m)\n",
|
|
" utility_rate_layer.add_to(m)\n",
|
|
" climate_layer.add_to(m)\n",
|
|
" broadband_layer.add_to(m)\n",
|
|
" election_2020_layer.add_to(m)\n",
|
|
" nri_layer.add_to(m)\n",
|
|
" clustered_layer.add_to(m)\n",
|
|
" noise_layer.add_to(m)\n",
|
|
" centroid_layer.add_to(m)\n",
|
|
" folium.LayerControl(collapsed=False).add_to(m)\n",
|
|
" if bounds:\n",
|
|
" m.fit_bounds(bounds, padding=(20, 20))\n",
|
|
" return m\n",
|
|
"\n",
|
|
"\n",
|
|
"cluster_map = build_cluster_map(points, clusters)\n",
|
|
"cluster_map"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "13",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Export HTML"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "14",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cluster_map.save(MAP_HTML)\n",
|
|
"print('Wrote:', MAP_HTML.resolve())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "15",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Feature Staging Area\n",
|
|
"\n",
|
|
"Tell me what you want to add next and I will build it here. Good candidates:\n",
|
|
"- filters by source/operator/state/cluster size\n",
|
|
"- toggle layers for top-N clusters\n",
|
|
"- water-stress overlays on top of the HUC8 layer\n",
|
|
"- nearest generator capacity / fuel mix summaries around each DC\n",
|
|
"- opposition cases overlay\n",
|
|
"- utility-rate filters by state or service type\n",
|
|
"- cluster labels or summary panels\n",
|
|
"- downloadable GeoJSON exports\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|