updated map and cluster analysis
This commit is contained in:
@@ -302,6 +302,7 @@
|
||||
"# LEFT JOIN master_data_center_spatial_clusters (c) ON master_id\n",
|
||||
"# LEFT JOIN _dc_census_tract_acs_2024 (acs) ON m.geoid = acs.geoid\n",
|
||||
"# LEFT JOIN ruca_codes_2020_tract (ruca) ON m.geoid = ruca.tract_fips_20\n",
|
||||
"# LEFT JOIN watershed_huc8 (w) ON ST_Contains(w.geom, m.geom)\n",
|
||||
"# LEFT JOIN (EIA operating generators within RADIUS_KM, latest period) aggregated per DC\n",
|
||||
"#\n",
|
||||
"# Energy aggregation: latest period, status='OP', sum of nameplate_capacity_mw\n",
|
||||
@@ -357,6 +358,8 @@
|
||||
" ruca.urban_core, ruca.urban_core_type,\n",
|
||||
" ruca.pop_density as tract_pop_density,\n",
|
||||
" ruca.land_area as tract_land_area_sqmi,\n",
|
||||
" w.huc8, w.name as huc8_name, w.states as huc8_states,\n",
|
||||
" w.areasqkm as huc8_area_sqkm,\n",
|
||||
" coalesce(en.eia_gen_count, 0) as eia_gen_count,\n",
|
||||
" coalesce(en.eia_plant_count, 0) as eia_plant_count,\n",
|
||||
" coalesce(en.eia_capacity_mw, 0) as eia_capacity_mw,\n",
|
||||
@@ -371,6 +374,7 @@
|
||||
"left join public.master_data_center_spatial_clusters c on c.master_id = m.master_id\n",
|
||||
"left join public._dc_census_tract_acs_2024 acs on acs.geoid = m.geoid\n",
|
||||
"left join public.ruca_codes_2020_tract ruca on ruca.tract_fips_20 = m.geoid\n",
|
||||
"left join public.watershed_huc8 w on m.geom is not null and st_contains(w.geom, m.geom)\n",
|
||||
"left join energy_nearby en on en.master_id = m.master_id\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
@@ -381,6 +385,7 @@
|
||||
"print('non-null geoid: ', joined_df['geoid'].notna().sum())\n",
|
||||
"print('non-null cluster_id: ', joined_df['cluster_id'].notna().sum())\n",
|
||||
"print('non-null primary_ruca: ', joined_df['primary_ruca'].notna().sum())\n",
|
||||
"print('non-null huc8: ', joined_df['huc8'].notna().sum())\n",
|
||||
"print('DCs with >=1 nearby gen: ', (joined_df['eia_gen_count'] > 0).sum())\n",
|
||||
"print(f\"median nearby capacity: {joined_df['eia_capacity_mw'].median():,.0f} MW\")\n",
|
||||
"print(f\" 90th percentile: {joined_df['eia_capacity_mw'].quantile(0.9):,.0f} MW\")\n",
|
||||
@@ -815,6 +820,102 @@
|
||||
"print(\"Non-metro DCs by operator group, sized by aggregate nearby grid capacity:\")\n",
|
||||
"hyperscaler_view\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HUC8 watershed concentration\n",
|
||||
"\n",
|
||||
"Each DC sits in exactly one HUC8 watershed (USGS 8-digit hydrologic unit, roughly subbasin scale, median area ~3,000 sq km). Watershed concentration matters because cooling water draw and wastewater discharge happen at the watershed scale, not the state scale — and a single watershed feeds finite reservoirs and aquifers.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Top watersheds by DC count, with demographic + energy context.\n",
|
||||
"watershed_summary = (\n",
|
||||
" joined_df[joined_df['huc8'].notna()]\n",
|
||||
" .groupby(['huc8', 'huc8_name', 'huc8_states'], dropna=False)\n",
|
||||
" .agg(\n",
|
||||
" dcs=('master_id', 'count'),\n",
|
||||
" states_in_dc=('state', lambda s: ','.join(sorted(s.dropna().unique()))),\n",
|
||||
" operators=('operator', lambda s: s.dropna().nunique()),\n",
|
||||
" med_pop_density=('tract_pop_density', 'median'),\n",
|
||||
" sum_eia_capacity_mw=('eia_capacity_mw', 'sum'),\n",
|
||||
" med_eia_capacity_mw=('eia_capacity_mw', 'median'),\n",
|
||||
" huc8_area_sqkm=('huc8_area_sqkm', 'first'),\n",
|
||||
" )\n",
|
||||
" .reset_index()\n",
|
||||
" .sort_values('dcs', ascending=False)\n",
|
||||
" .round(0)\n",
|
||||
")\n",
|
||||
"watershed_summary['dcs_per_1000_sqkm'] = (1000 * watershed_summary['dcs'] /\n",
|
||||
" watershed_summary['huc8_area_sqkm']).round(2)\n",
|
||||
"\n",
|
||||
"print(f\"DCs match {joined_df['huc8'].notna().sum():,} HUC8 watersheds\")\n",
|
||||
"print(f\"Distinct HUC8s holding DCs: {watershed_summary['huc8'].nunique():,}\\n\")\n",
|
||||
"print(\"Top 15 watersheds by DC count:\")\n",
|
||||
"watershed_summary.head(15).reset_index(drop=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Concentration: what share of all DCs sit in the top N watersheds?\n",
|
||||
"# (A water-scarcity concentration story — if a single basin is stressed, how\n",
|
||||
"# much of the national DC footprint feels it?)\n",
|
||||
"ws_sorted = (watershed_summary.sort_values('dcs', ascending=False)\n",
|
||||
" .reset_index(drop=True))\n",
|
||||
"ws_sorted['cumulative_dcs'] = ws_sorted['dcs'].cumsum()\n",
|
||||
"total_dcs = int(ws_sorted['dcs'].sum())\n",
|
||||
"ws_sorted['cumulative_pct'] = (100 * ws_sorted['cumulative_dcs'] / total_dcs).round(1)\n",
|
||||
"\n",
|
||||
"print('Watershed concentration of US data centers:')\n",
|
||||
"for n in [1, 2, 3, 5, 10, 15, 20, 30, 50, 100]:\n",
|
||||
" cum = int(ws_sorted.head(n)['dcs'].sum())\n",
|
||||
" print(f' Top {n:3d} watersheds: {cum:>5,} DCs ({100*cum/total_dcs:5.1f}% of all US DCs)')\n",
|
||||
"print(f'\\nTotal HUC8s with at least one DC: {len(ws_sorted)}')\n",
|
||||
"print(f'Total US HUC8 watersheds: 2,139')\n",
|
||||
"print(f'Fraction touched by any DC: {100*len(ws_sorted)/2139:.1f}%')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Non-metro watershed concentration: where are the hyperscale rural builds clustered\n",
|
||||
"# at the watershed scale?\n",
|
||||
"nm_ws = (\n",
|
||||
" joined_df[joined_df['primary_ruca'].isin([4,5,6,7,8,9,10]) & joined_df['huc8'].notna()]\n",
|
||||
" .groupby(['huc8', 'huc8_name', 'huc8_states'])\n",
|
||||
" .agg(\n",
|
||||
" dcs=('master_id', 'count'),\n",
|
||||
" states_in_dc=('state', lambda s: ','.join(sorted(s.dropna().unique()))),\n",
|
||||
" top_operators=('operator', lambda s: ','.join(sorted(s.dropna().unique())[:4])),\n",
|
||||
" med_eia_capacity_mw=('eia_capacity_mw', 'median'),\n",
|
||||
" sum_eia_hydro_mw=('eia_capacity_hydro', 'sum'),\n",
|
||||
" sum_eia_wind_mw=('eia_capacity_wind', 'sum'),\n",
|
||||
" )\n",
|
||||
" .reset_index()\n",
|
||||
" .sort_values('dcs', ascending=False)\n",
|
||||
" .round(0)\n",
|
||||
")\n",
|
||||
"print('Top non-metro watersheds (RUCA 4-10):')\n",
|
||||
"nm_ws.head(15).reset_index(drop=True)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Reference in New Issue
Block a user