diff --git a/enhanced_data_center_cluster_map.ipynb b/enhanced_data_center_cluster_map.ipynb index d6b63bc..d2f76b2 100644 --- a/enhanced_data_center_cluster_map.ipynb +++ b/enhanced_data_center_cluster_map.ipynb @@ -13,13 +13,14 @@ "- Loads point and cluster summary CSVs from `output/`.\n", "- Recreates the cluster-colored Folium map.\n", "- Enriches point popups with HUC8 watershed, RUCA, tract demographics, and state energy context where available.\n", - "- Adds separate layers for clustered points, isolated/noise points, cluster centroids, HUC8 watersheds, and state IM3 projected demand.\n", + "- Adds separate layers for clustered points, isolated/noise points, cluster centroids, HUC8 watersheds, state IM3 projected demand, EIA generator capacity, and utility-rate context.\n", "- Saves a standalone HTML map to `output/enhanced_master_data_center_spatial_clusters_map.html`.\n", "\n", "Notes from `output/data_center_demographic_ruca_energy_summary.md`:\n", "- HUC8 watershed join is a recommended next step for water-context analysis.\n", "- `im3_state_projected_moderate_50` is populated and used for state projected demand context.\n", - "- `seds_state_msn_year` is checked through the state context export, but it currently has no rows, so SEDS fields are blank until that table is populated.\n" + "- `seds_state_msn_year` is checked through the state context export, but it currently has no rows, so SEDS fields are blank until that table is populated.\n", + "- EIA generator capacity uses the latest available period in `public.energy_eia_operating_generator_capacity_flat`; utility-rate context uses `public.utility_rate_tracker_2025_2028`.\n" ] }, { @@ -97,6 +98,10 @@ "SHOW_ELECTION_2020_LAYER = True\n", "SHOW_ELECTION_2024_LAYER = False\n", "SHOW_NRI_LAYER = True\n", + "SHOW_EIA_GENERATOR_CAPACITY_LAYER = True\n", + "EIA_GENERATOR_PERIOD = None # None uses the latest available EIA period.\n", + "MAX_EIA_GENERATOR_PLANTS = 2000\n", + "SHOW_UTILITY_RATE_TRACKER_LAYER = True\n", "\n", "OUTPUT_DIR.mkdir(exist_ok=True)\n", "print('points:', POINTS_CSV)\n", @@ -193,6 +198,9 @@ "broadband_context = pd.DataFrame()\n", "election_context = pd.DataFrame()\n", "nri_context = pd.DataFrame()\n", + "generator_capacity_plants = pd.DataFrame()\n", + "utility_rate_tracker = pd.DataFrame()\n", + "utility_rate_state_context = pd.DataFrame()\n", "\n", "\n", "def load_zsh_secrets() -> None:\n", @@ -230,6 +238,7 @@ "def load_optional_db_layers() -> None:\n", " global internet_cables_geojson, opposition_cases, drought_context, smoke_context\n", " global climate_context, broadband_context, election_context, nri_context, points\n", + " global generator_capacity_plants, utility_rate_tracker, utility_rate_state_context\n", "\n", " if not ENABLE_DB_LAYER_LOAD:\n", " print('DB layer load disabled')\n", @@ -279,6 +288,176 @@ " opposition_cases = pd.read_sql(opposition_sql, conn)\n", " print(f'opposition_cases rows: {len(opposition_cases):,}')\n", "\n", + " if SHOW_EIA_GENERATOR_CAPACITY_LAYER:\n", + " generator_sql = \"\"\"\n", + " with selected_period as (\n", + " select coalesce(%(period)s::text, max(period)) as period\n", + " from public.energy_eia_operating_generator_capacity_flat\n", + " ),\n", + " latest_generators as (\n", + " select g.*\n", + " from public.energy_eia_operating_generator_capacity_flat g\n", + " join selected_period sp on g.period = sp.period\n", + " where g.geom is not null\n", + " and g.latitude is not null\n", + " and g.longitude is not null\n", + " ),\n", + " source_capacity as (\n", + " select\n", + " plant_id,\n", + " energy_source_code,\n", + " max(energy_source_desc) as energy_source_desc,\n", + " sum(coalesce(nameplate_capacity_mw, 0)) as nameplate_capacity_mw,\n", + " count(*) as generator_count\n", + " from latest_generators\n", + " group by plant_id, energy_source_code\n", + " ),\n", + " source_rank as (\n", + " select\n", + " *,\n", + " row_number() over (\n", + " partition by plant_id\n", + " order by nameplate_capacity_mw desc nulls last, energy_source_code nulls last\n", + " ) as rn\n", + " from source_capacity\n", + " ),\n", + " source_mix as (\n", + " select\n", + " plant_id,\n", + " string_agg(\n", + " coalesce(energy_source_code, 'UNK') || ': ' ||\n", + " round(nameplate_capacity_mw::numeric, 1)::text || ' MW',\n", + " ', ' order by nameplate_capacity_mw desc nulls last\n", + " ) as energy_source_mix\n", + " from source_capacity\n", + " group by plant_id\n", + " ),\n", + " plant_capacity as (\n", + " select\n", + " lg.period,\n", + " lg.plant_id,\n", + " max(lg.plant_name) as plant_name,\n", + " max(lg.state_id) as state_id,\n", + " max(lg.state_name) as state_name,\n", + " string_agg(distinct nullif(lg.entity_name, ''), '; ') as entity_names,\n", + " max(lg.balancing_authority_code) as balancing_authority_code,\n", + " max(lg.balancing_authority_name) as balancing_authority_name,\n", + " avg(lg.latitude) as latitude,\n", + " avg(lg.longitude) as longitude,\n", + " sum(coalesce(lg.nameplate_capacity_mw, 0)) as nameplate_capacity_mw,\n", + " sum(coalesce(lg.net_summer_capacity_mw, 0)) as net_summer_capacity_mw,\n", + " sum(coalesce(lg.net_winter_capacity_mw, 0)) as net_winter_capacity_mw,\n", + " count(*) as generator_count\n", + " from latest_generators lg\n", + " group by lg.period, lg.plant_id\n", + " )\n", + " select\n", + " pc.*,\n", + " sr.energy_source_code as primary_energy_source_code,\n", + " sr.energy_source_desc as primary_energy_source_desc,\n", + " sm.energy_source_mix\n", + " from plant_capacity pc\n", + " left join source_rank sr on sr.plant_id = pc.plant_id and sr.rn = 1\n", + " left join source_mix sm on sm.plant_id = pc.plant_id\n", + " where pc.nameplate_capacity_mw > 0\n", + " order by pc.nameplate_capacity_mw desc nulls last\n", + " limit %(limit)s\n", + " \"\"\"\n", + " generator_capacity_plants = pd.read_sql(\n", + " generator_sql,\n", + " conn,\n", + " params={'period': EIA_GENERATOR_PERIOD, 'limit': MAX_EIA_GENERATOR_PLANTS},\n", + " )\n", + " period_label = (\n", + " generator_capacity_plants['period'].iloc[0]\n", + " if not generator_capacity_plants.empty and 'period' in generator_capacity_plants\n", + " else EIA_GENERATOR_PERIOD\n", + " )\n", + " print(\n", + " f'eia generator capacity plants: {len(generator_capacity_plants):,} '\n", + " f'(period {period_label}; top {MAX_EIA_GENERATOR_PLANTS:,} by nameplate MW)'\n", + " )\n", + "\n", + " if SHOW_UTILITY_RATE_TRACKER_LAYER:\n", + " utility_rate_sql = \"\"\"\n", + " select\n", + " utility_provider, state_name, state_code as utility_state_code, state_id,\n", + " service_type, customer_count, total_revenue_increase_2025_2028,\n", + " time_period, monthly_increase_amount, monthly_pct_increase_ratio,\n", + " effective_date, effective_date_raw, status, source_file\n", + " from public.utility_rate_tracker_2025_2028\n", + " order by state_code, utility_provider, service_type, effective_date\n", + " \"\"\"\n", + " utility_rate_state_sql = \"\"\"\n", + " with state_rollup as (\n", + " select\n", + " state_code as utility_state_code,\n", + " max(state_name) as utility_state_name,\n", + " count(*) as utility_rate_case_count,\n", + " count(distinct utility_provider) as utility_rate_provider_count,\n", + " count(*) filter (where lower(coalesce(service_type, '')) like 'electric%%') as utility_rate_electric_case_count,\n", + " count(*) filter (where lower(coalesce(service_type, '')) like '%%gas%%') as utility_rate_gas_case_count,\n", + " sum(coalesce(customer_count, 0)) as utility_rate_customer_count,\n", + " sum(coalesce(total_revenue_increase_2025_2028, 0)) as utility_rate_total_revenue_increase_2025_2028,\n", + " avg(monthly_increase_amount) as utility_rate_avg_monthly_increase_amount,\n", + " avg(monthly_pct_increase_ratio) as utility_rate_avg_monthly_pct_increase_ratio,\n", + " min(effective_date) as utility_rate_first_effective_date,\n", + " max(effective_date) as utility_rate_last_effective_date\n", + " from public.utility_rate_tracker_2025_2028\n", + " group by state_code\n", + " ),\n", + " ranked_utilities as (\n", + " select\n", + " state_code as utility_state_code,\n", + " utility_provider,\n", + " service_type,\n", + " total_revenue_increase_2025_2028,\n", + " row_number() over (\n", + " partition by state_code\n", + " order by total_revenue_increase_2025_2028 desc nulls last, utility_provider\n", + " ) as rn\n", + " from public.utility_rate_tracker_2025_2028\n", + " ),\n", + " top_utilities as (\n", + " select\n", + " utility_state_code,\n", + " string_agg(\n", + " coalesce(utility_provider, 'Unknown') || ' (' || coalesce(service_type, 'service') || ')',\n", + " '; ' order by rn\n", + " ) as utility_rate_top_utilities\n", + " from ranked_utilities\n", + " where rn <= 3\n", + " group by utility_state_code\n", + " )\n", + " select sr.*, tu.utility_rate_top_utilities\n", + " from state_rollup sr\n", + " left join top_utilities tu using (utility_state_code)\n", + " order by utility_state_code\n", + " \"\"\"\n", + " utility_rate_tracker = pd.read_sql(utility_rate_sql, conn)\n", + " utility_rate_state_context = pd.read_sql(utility_rate_state_sql, conn)\n", + " print(f'utility_rate_tracker rows: {len(utility_rate_tracker):,}')\n", + " print(f'utility_rate_state_context rows: {len(utility_rate_state_context):,}')\n", + "\n", + " if not utility_rate_state_context.empty:\n", + " if not state_energy.empty and {'state_code', 'map_latitude', 'map_longitude'}.issubset(state_energy.columns):\n", + " state_coords = state_energy[['state_code', 'map_latitude', 'map_longitude']].copy()\n", + " state_coords = state_coords.rename(columns={'state_code': 'utility_state_code'})\n", + " utility_rate_state_context = utility_rate_state_context.merge(\n", + " state_coords, on='utility_state_code', how='left'\n", + " )\n", + "\n", + " cols = [\n", + " c for c in utility_rate_state_context.columns\n", + " if c not in {'utility_state_code', 'map_latitude', 'map_longitude'}\n", + " ]\n", + " points = points.merge(\n", + " utility_rate_state_context[['utility_state_code'] + cols],\n", + " left_on='state',\n", + " right_on='utility_state_code',\n", + " how='left',\n", + " )\n", + "\n", " if SHOW_DROUGHT_AND_SMOKE_CONTEXT:\n", " drought_sql = \"\"\"\n", " select\n", @@ -443,6 +622,8 @@ "- `public.data_center_broadband_connection` (broadband capacity layer + popup)\n", "- `public.data_center_rdh_precinct_vote_matches` (election context layer + popup)\n", "- `public.data_center_nri_exposure` (FEMA NRI multi-hazard risk layer + popup)\n", + "- `public.energy_eia_operating_generator_capacity_flat` (latest-period generator capacity plant layer)\n", + "- `public.utility_rate_tracker_2025_2028` (state utility-rate tracker layer + point popup enrichment)\n", "\n", "If DB credentials are unavailable, map generation still works with CSV/GeoJSON sources." ] @@ -471,6 +652,8 @@ "STATE_ENERGY_COLOR = '#f59e0b'\n", "INTERNET_CABLE_COLOR = '#7c3aed'\n", "OPPOSITION_CASE_COLOR = '#b91c1c'\n", + "GENERATOR_CAPACITY_COLOR = '#15803d'\n", + "UTILITY_RATE_COLOR = '#0f766e'\n", "\n", "# NRI hazard prefix -> human-readable label, used in the per-DC popup.\n", "NRI_HAZARDS = [\n", @@ -504,6 +687,16 @@ " return f\"{prefix}{value:,.{decimals}f}{suffix}\"\n", "\n", "\n", + "def fmt_pct_ratio(value, decimals=1):\n", + " if pd.isna(value):\n", + " return ''\n", + " try:\n", + " value = float(value) * 100.0\n", + " except (TypeError, ValueError):\n", + " return clean_value(value)\n", + " return fmt_number(value, decimals, suffix='%')\n", + "\n", + "\n", "def cluster_color(cluster_id):\n", " if cluster_id == -1:\n", " return NOISE_COLOR\n", @@ -581,6 +774,33 @@ " return '#0284c7'\n", "\n", "\n", + "def generator_capacity_color(source_code):\n", + " code = clean_value(source_code).upper()\n", + " source_colors = {\n", + " 'NG': '#f97316', # natural gas\n", + " 'SUN': '#facc15', # solar\n", + " 'WND': '#16a34a', # wind\n", + " 'WAT': '#0284c7', # hydro\n", + " 'NUC': '#7c3aed', # nuclear\n", + " 'BIT': '#111827', 'SUB': '#374151', 'LIG': '#4b5563',\n", + " 'DFO': '#b45309', 'RFO': '#92400e',\n", + " }\n", + " return source_colors.get(code, GENERATOR_CAPACITY_COLOR)\n", + "\n", + "\n", + "def utility_rate_color(avg_pct_ratio):\n", + " if pd.isna(avg_pct_ratio):\n", + " return '#94a3b8'\n", + " pct = float(avg_pct_ratio) * 100.0\n", + " if pct >= 15:\n", + " return '#7f1d1d'\n", + " if pct >= 10:\n", + " return '#dc2626'\n", + " if pct >= 5:\n", + " return '#f59e0b'\n", + " return '#0284c7'\n", + "\n", + "\n", "def top_nri_hazards(row, n=3):\n", " \"\"\"Return the top-N hazards by risk score for this DC, as 'Label: score' strings.\"\"\"\n", " pairs = []\n", @@ -639,6 +859,22 @@ " {seds_note}\n", " '''\n", "\n", + " utility_rate_lines = ''\n", + " if hasattr(row, 'utility_rate_case_count') and pd.notna(row.utility_rate_case_count):\n", + " utility_rate_lines = f'''\n", + "