cables and maps

This commit is contained in:
2026-05-17 15:32:51 -07:00
parent 3f7875084d
commit eecfa49779
12 changed files with 231292 additions and 0 deletions

View File

@@ -0,0 +1,236 @@
#!/usr/bin/env python3
"""Quick statistical analysis: are US data centers spatially tied to submarine
cables, and does the resulting pattern look like concentrated costs / dispersed
benefits?
"""
import math
import os
import statistics
from collections import Counter
import psycopg2
def connect():
return psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname="data_centers",
)
def quantiles(xs, qs=(0.1, 0.25, 0.5, 0.75, 0.9)):
s = sorted(xs)
n = len(s)
out = {}
for q in qs:
if n == 0:
out[q] = None
continue
k = (n - 1) * q
lo, hi = math.floor(k), math.ceil(k)
if lo == hi:
out[q] = s[int(k)]
else:
out[q] = s[lo] + (s[hi] - s[lo]) * (k - lo)
return out
def gini(values):
"""Standard Gini coefficient for non-negative values."""
v = sorted(x for x in values if x is not None and x >= 0)
n = len(v)
if n == 0 or sum(v) == 0:
return None
cum = 0.0
for i, x in enumerate(v, 1):
cum += i * x
return (2 * cum) / (n * sum(v)) - (n + 1) / n
def hhi(shares):
"""Herfindahl-Hirschman Index on shares that sum to ~1. Returns value in [0, 1]."""
return sum(s * s for s in shares)
def mann_whitney_u_z(xs, ys):
"""Approximate Mann-Whitney U test z-score (normal approx, large-n).
Returns (U, z, p_two_sided). Uses average ranks for ties.
"""
combined = [(v, 0) for v in xs] + [(v, 1) for v in ys]
combined.sort(key=lambda t: t[0])
ranks = [0.0] * len(combined)
i = 0
n = len(combined)
while i < n:
j = i
while j + 1 < n and combined[j + 1][0] == combined[i][0]:
j += 1
avg_rank = (i + j) / 2 + 1
for k in range(i, j + 1):
ranks[k] = avg_rank
i = j + 1
r1 = sum(r for r, (_, g) in zip(ranks, combined) if g == 0)
n1, n2 = len(xs), len(ys)
U1 = r1 - n1 * (n1 + 1) / 2
mu = n1 * n2 / 2
sigma = math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
z = (U1 - mu) / sigma if sigma else 0.0
# Two-sided p via error function
p = math.erfc(abs(z) / math.sqrt(2))
return U1, z, p
def main():
conn = connect()
cur = conn.cursor()
# --- 1. Distance from each US data center to nearest submarine cable ---
cur.execute(
"""
with cables_union as (
select ST_Union(geom)::geography as g from public.internet_cables
)
select ST_Distance(
ST_SetSRID(ST_MakePoint(dc.longitude, dc.latitude), 4326)::geography,
cu.g
) / 1000.0 -- meters -> km
from public.us_dc_sample_geocoded dc, cables_union cu
where dc.longitude is not null and dc.latitude is not null
and (dc.country = 'United States' or dc.country is null)
"""
)
dc_km = [float(r[0]) for r in cur.fetchall()]
# --- 2. Distance from US city-dominance points to nearest cable ---
cur.execute(
"""
with cables_union as (
select ST_Union(geom)::geography as g from public.internet_cables
)
select ST_Distance(
ST_SetSRID(ST_MakePoint(c.longitude, c.latitude), 4326)::geography,
cu.g
) / 1000.0
from public.internet_city_dominance c, cables_union cu
where c.country = 'US' and c.geom is not null
"""
)
city_km = [float(r[0]) for r in cur.fetchall()]
# --- 3. DC distribution by state (cost concentration) ---
cur.execute(
"""
select coalesce(nullif(state_code, ''), 'UNK') as st, count(*)
from public.us_dc_sample_geocoded
where longitude is not null and latitude is not null
group by 1
"""
)
state_counts = dict(cur.fetchall())
total_dc = sum(state_counts.values())
state_shares = {k: v / total_dc for k, v in state_counts.items()}
# --- 4. IP distribution across US cities (benefit dispersion proxy) ---
cur.execute(
"""
select city, coalesce(logical_dominance_ips, 0)
from public.internet_city_dominance
where country = 'US' and logical_dominance_ips is not null
"""
)
city_ips = [(r[0], int(r[1])) for r in cur.fetchall()]
total_ips = sum(v for _, v in city_ips)
ip_shares = [v / total_ips for _, v in city_ips] if total_ips else []
# --- 5. Where do the people-with-IPs LIVE relative to the DCs? ---
# Top-N US dominance cities, share of national IPs each captures.
top_ip_cities = sorted(city_ips, key=lambda t: -t[1])[:10]
cur.close()
conn.close()
# ======= report =======
print("=" * 70)
print("ARE US DATA CENTERS TIED TO SUBMARINE CABLE LANDINGS?")
print("=" * 70)
print(f"\nN data centers analyzed: {len(dc_km):,}")
print(f"N US city-dominance pts: {len(city_km):,}")
def fmt_q(label, xs):
q = quantiles(xs)
print(f"\n{label}:")
print(f" mean = {statistics.mean(xs):,.1f} km")
print(f" median (p50) = {q[0.5]:,.1f} km")
print(f" p10 / p25 / p75 / p90 = "
f"{q[0.1]:,.1f} / {q[0.25]:,.1f} / {q[0.75]:,.1f} / {q[0.9]:,.1f} km")
for thr in (10, 50, 100, 250):
frac = sum(1 for x in xs if x <= thr) / len(xs)
print(f" share within {thr:>3} km of a cable: {frac*100:5.1f}%")
fmt_q("Distance: US DATA CENTERS -> nearest submarine cable", dc_km)
fmt_q("Distance: US POPULATION CITIES -> nearest submarine cable", city_km)
U, z, p = mann_whitney_u_z(dc_km, city_km)
print(f"\nMann-Whitney U (DCs vs. cities, two-sided): U={U:,.0f}, z={z:.2f}, "
f"p≈{p:.2e}")
if statistics.median(dc_km) < statistics.median(city_km):
diff = statistics.median(city_km) - statistics.median(dc_km)
print(f" -> DC median is {diff:,.1f} km CLOSER to cables than city median.")
else:
print(" -> DCs are not closer to cables than cities.")
print("\n" + "=" * 70)
print("CONCENTRATION OF COSTS (data centers by state)")
print("=" * 70)
g_dc = gini(list(state_counts.values()))
h_dc = hhi(list(state_shares.values()))
print(f"States covered: {len(state_counts)}")
print(f"Gini of DC counts across states: {g_dc:.3f} (0=even, 1=one state takes all)")
print(f"HHI of state shares: {h_dc:.3f} (0.18+ = highly concentrated)")
top_states = sorted(state_shares.items(), key=lambda t: -t[1])[:8]
cum = 0.0
print(f"\nTop states by share of US data centers:")
for st, s in top_states:
cum += s
print(f" {st}: {s*100:5.1f}% ({state_counts[st]:>4} DCs) cum={cum*100:5.1f}%")
print("\n" + "=" * 70)
print("DISPERSION OF BENEFITS (US IPs across cities)")
print("=" * 70)
g_ip = gini([v for _, v in city_ips])
h_ip = hhi(ip_shares)
print(f"US cities with IP data: {len(city_ips):,}")
print(f"Gini of IPs across cities: {g_ip:.3f}")
print(f"HHI of IP shares: {h_ip:.3f}")
cum = 0.0
print(f"\nTop US cities by share of national IPs:")
for city, ips in top_ip_cities:
s = ips / total_ips
cum += s
print(f" {city:<30} {s*100:5.2f}% ({ips:>11,} IPs) cum={cum*100:5.2f}%")
print("\n" + "=" * 70)
print("INTERPRETATION")
print("=" * 70)
print(f"""
Cost concentration (DCs across states): Gini={g_dc:.3f} HHI={h_dc:.3f}
Benefit dispersion (IPs across cities): Gini={g_ip:.3f} HHI={h_ip:.3f}
A "concentrated costs / dispersed benefits" pattern requires:
(1) DCs cluster in a few places (high state-level Gini/HHI).
(2) Users they serve span many places (low city-level Gini/HHI, ideally).
(3) That clustering is plausibly tied to fixed infrastructure (cables).
Check signs above:
- DC location vs cable proximity: see Mann-Whitney result.
- Cost concentration: compare DC HHI to a "competitive" benchmark (~0.10).
- Benefit dispersion: IP-share Gini >> DC-state Gini would corroborate
the asymmetry (benefits more evenly distributed than costs).
""")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,320 @@
#!/usr/bin/env python3
"""Tract-level analysis of the 'concentrated costs / dispersed benefits' frame
for US data-center siting.
Cost-bearing universe = tracts that host at least one DC
(public.data_center_census_tracts_2024)
Comparison universe = ACS 2024 5-yr tracts in the selected states
(census_tract_acs_2024_selected_states.csv)
"""
import csv
import math
import os
import statistics
from collections import Counter
import psycopg2
CSV_PATH = "census_tract_acs_2024_selected_states.csv"
def connect():
return psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname="data_centers",
)
def gini(values):
v = sorted(x for x in values if x is not None and x >= 0)
n = len(v)
if n == 0 or sum(v) == 0:
return None
cum = sum(i * x for i, x in enumerate(v, 1))
return (2 * cum) / (n * sum(v)) - (n + 1) / n
def hhi(shares):
return sum(s * s for s in shares)
def median(xs):
xs = [x for x in xs if x is not None]
return statistics.median(xs) if xs else None
def mean(xs):
xs = [x for x in xs if x is not None]
return statistics.mean(xs) if xs else None
def wmean(xs, ws):
pairs = [(x, w) for x, w in zip(xs, ws) if x is not None and w is not None and w > 0]
if not pairs:
return None
total = sum(w for _, w in pairs)
return sum(x * w for x, w in pairs) / total
def to_float(s):
try:
return float(s)
except (TypeError, ValueError):
return None
def to_int(s):
try:
return int(float(s))
except (TypeError, ValueError):
return None
def main():
conn = connect()
cur = conn.cursor()
# DC-hosting tracts (the cost-bearing universe) ----------------------
cur.execute(
"""
select
geoid,
statefp,
data_center_count,
population,
households,
broadband_subscription_pct,
median_household_income,
per_capita_income,
poverty_rate,
non_hispanic_white_pct,
non_hispanic_black_pct,
hispanic_latino_pct,
non_hispanic_asian_pct,
primary_industry,
land_area_sqm,
industry_information_workers,
industry_total_workers
from public.data_center_census_tracts_2024
"""
)
dc_tracts = []
for r in cur.fetchall():
dc_tracts.append(
{
"geoid": r[0],
"statefp": r[1],
"dc_count": r[2] or 0,
"pop": r[3],
"hh": r[4],
"broadband_pct": float(r[5]) if r[5] is not None else None,
"mhi": r[6],
"pci": r[7],
"poverty": float(r[8]) if r[8] is not None else None,
"white_pct": float(r[9]) if r[9] is not None else None,
"black_pct": float(r[10]) if r[10] is not None else None,
"hisp_pct": float(r[11]) if r[11] is not None else None,
"asian_pct": float(r[12]) if r[12] is not None else None,
"primary_industry": r[13],
"land_sqm": r[14],
"info_workers": r[15],
"total_workers": r[16],
}
)
# Distance from each DC tract to nearest cable (km) ----------------
cur.execute(
"""
with cables as (select ST_Union(geom)::geography g from public.internet_cables)
select t.geoid,
ST_Distance(ST_Centroid(t.geom)::geography, c.g) / 1000.0
from public.data_center_census_tracts_2024 t, cables c
"""
)
dist_by_geoid = {r[0]: float(r[1]) for r in cur.fetchall()}
for t in dc_tracts:
t["dist_km"] = dist_by_geoid.get(t["geoid"])
cur.close()
conn.close()
# Comparison universe from the wider ACS CSV ------------------------
universe = []
with open(CSV_PATH, newline="", encoding="utf-8") as f:
for row in csv.DictReader(f):
universe.append(
{
"geoid": row["geoid"],
"statefp": row["statefp"],
"pop": to_int(row["population"]),
"broadband_pct": to_float(row["broadband_subscription_pct"]),
"mhi": to_int(row["median_household_income"]),
"pci": to_int(row["per_capita_income"]),
"poverty": to_float(row["poverty_rate"]),
"white_pct": to_float(row["non_hispanic_white_pct"]),
"black_pct": to_float(row["non_hispanic_black_pct"]),
"hisp_pct": to_float(row["hispanic_latino_pct"]),
"asian_pct": to_float(row["non_hispanic_asian_pct"]),
}
)
dc_geoids = {t["geoid"] for t in dc_tracts}
non_dc = [u for u in universe if u["geoid"] not in dc_geoids]
# Restrict comparison to states actually represented in the DC sample
dc_states = {t["statefp"] for t in dc_tracts}
universe_in_dc_states = [u for u in universe if u["statefp"] in dc_states]
non_dc_in_dc_states = [u for u in non_dc if u["statefp"] in dc_states]
# ============== report ==============
print("=" * 72)
print("TRACT-LEVEL CONCENTRATED COSTS / DISPERSED BENEFITS ANALYSIS")
print("=" * 72)
total_dc = sum(t["dc_count"] for t in dc_tracts)
print(f"\nDC-hosting tracts: {len(dc_tracts):,}")
print(f"Data centers in those tracts: {total_dc:,}")
print(f"ACS universe (selected states): {len(universe):,} tracts")
print(f"States represented in DC sample: {len(dc_states)}")
print(f"Universe restricted to DC states: {len(universe_in_dc_states):,} tracts")
# --- Cost concentration at the tract level ---
print("\n" + "-" * 72)
print("1. COST CONCENTRATION (DCs across tracts)")
print("-" * 72)
counts = [t["dc_count"] for t in dc_tracts]
shares = [c / total_dc for c in counts]
g_dc = gini(counts)
h_dc = hhi(shares)
print(f"Gini of DC counts across DC-hosting tracts: {g_dc:.3f}")
print(f"HHI of DC shares across DC-hosting tracts: {h_dc:.4f}")
# Top 1% / 5% of tracts share
top1 = max(1, len(counts) // 100)
top5 = max(1, len(counts) // 20)
s = sorted(counts, reverse=True)
print(f"Top 1% of DC-hosting tracts ({top1:>3} tracts) hold "
f"{sum(s[:top1])/total_dc*100:5.1f}% of all DCs")
print(f"Top 5% of DC-hosting tracts ({top5:>3} tracts) hold "
f"{sum(s[:top5])/total_dc*100:5.1f}% of all DCs")
print(f"Top 20% of DC-hosting tracts ({len(counts)//5:>3} tracts) hold "
f"{sum(s[:len(counts)//5])/total_dc*100:5.1f}% of all DCs")
# How small a fraction of population lives in a DC tract?
pop_dc = sum(t["pop"] or 0 for t in dc_tracts)
pop_universe = sum(u["pop"] or 0 for u in universe_in_dc_states)
print(f"\nPopulation living in a DC-hosting tract: {pop_dc:>11,}")
print(f"Total population (DC-states ACS universe): {pop_universe:>11,}")
if pop_universe:
print(f" -> {pop_dc/pop_universe*100:.2f}% of people in DC-hosting states "
f"live in a DC-hosting tract")
# Per-capita DC density
if pop_dc:
print(f" -> 1 DC per {pop_dc/total_dc:,.0f} residents in DC-hosting tracts")
if pop_universe and total_dc:
print(f" vs. 1 DC per {pop_universe/total_dc:,.0f} residents "
f"averaged across DC-state population")
# --- Profile of cost-bearing communities ---
print("\n" + "-" * 72)
print("2. WHO BEARS THE COSTS? (ACS profile of DC tracts vs. peer tracts)")
print("-" * 72)
fields = [
("Median household income ($)", "mhi", "{:>10,.0f}"),
("Per-capita income ($)", "pci", "{:>10,.0f}"),
("Broadband subscription (%)", "broadband_pct", "{:>10,.1f}"),
("Poverty rate (%)", "poverty", "{:>10,.1f}"),
("Non-Hispanic White (%)", "white_pct", "{:>10,.1f}"),
("Non-Hispanic Black (%)", "black_pct", "{:>10,.1f}"),
("Hispanic/Latino (%)", "hisp_pct", "{:>10,.1f}"),
("Non-Hispanic Asian (%)", "asian_pct", "{:>10,.1f}"),
]
label_w = max(len(lbl) for lbl, *_ in fields)
print(f"{'Field':<{label_w}} {'DC tracts':>12} {'Non-DC peers':>14} "
f"{'Δ (DC peer)':>15}")
for label, key, fmt in fields:
dc_med = median([t[key] for t in dc_tracts])
peer_med = median([u[key] for u in non_dc_in_dc_states])
if dc_med is None or peer_med is None:
continue
delta = dc_med - peer_med
cell_dc = fmt.format(dc_med)
cell_pe = fmt.format(peer_med)
cell_dl = fmt.format(delta)
print(f"{label:<{label_w}} {cell_dc} {cell_pe} {cell_dl}")
print("\nPopulation-weighted means (DC tracts):")
pops = [t["pop"] for t in dc_tracts]
for label, key, _ in fields:
wm = wmean([t[key] for t in dc_tracts], pops)
if wm is not None:
print(f" {label:<{label_w}} {wm:>12,.1f}")
print("\nPrimary-industry mix of DC-hosting tracts (count of tracts):")
for industry, n in Counter(t["primary_industry"] for t in dc_tracts).most_common(10):
print(f" {n:>4} {industry}")
# --- Cable vs. inland subgroups ---
print("\n" + "-" * 72)
print("3. CABLE-ADJACENT vs. INLAND DC TRACTS")
print("-" * 72)
near = [t for t in dc_tracts if t.get("dist_km") is not None and t["dist_km"] <= 100]
far = [t for t in dc_tracts if t.get("dist_km") is not None and t["dist_km"] > 100]
print(f"≤100 km from a submarine cable: {len(near):>3} tracts, "
f"{sum(t['dc_count'] for t in near):>4} DCs")
print(f">100 km from a submarine cable: {len(far):>3} tracts, "
f"{sum(t['dc_count'] for t in far):>4} DCs")
if near and far:
print(f"{' Median MHI':<28} near={median([t['mhi'] for t in near]):>10,.0f} "
f"far={median([t['mhi'] for t in far]):>10,.0f}")
print(f"{' Median broadband %':<28} near={median([t['broadband_pct'] for t in near]):>10,.1f} "
f"far={median([t['broadband_pct'] for t in far]):>10,.1f}")
print(f"{' Median DC count':<28} near={median([t['dc_count'] for t in near]):>10,.0f} "
f"far={median([t['dc_count'] for t in far]):>10,.0f}")
# --- Benefit-side proxy ---
print("\n" + "-" * 72)
print("4. BENEFIT DISPERSION (broadband subscribers across all tracts)")
print("-" * 72)
# Total broadband subscribers approx = households * broadband_pct
subs = []
for u in universe_in_dc_states:
hh = None # households not in CSV; use population/2.5 fallback
if u["pop"] and u["broadband_pct"] is not None:
est_hh = u["pop"] / 2.5
subs.append(est_hh * u["broadband_pct"] / 100.0)
total_subs = sum(subs)
sg = gini(subs)
sh = hhi([s / total_subs for s in subs]) if total_subs else None
print(f"Estimated total broadband subscribers (DC states): {total_subs:>14,.0f}")
print(f"Gini of subscribers across {len(subs):,} tracts: {sg:.3f}")
print(f"HHI of subscribers across tracts: {sh:.5f}")
# Compare to DC HHI
print(f"\nSide-by-side concentration (lower = more dispersed):")
print(f" HHI of DCs across DC-hosting tracts: {h_dc:.4f}")
print(f" HHI of broadband subs across DC-state tracts: {sh:.5f} "
f"({h_dc/sh:.0f}x more concentrated for DCs)")
print("\n" + "=" * 72)
print("BOTTOM LINE")
print("=" * 72)
n_above_500 = sum(1 for t in dc_tracts if t["dc_count"] >= 5)
print(f"""
- DCs are extremely concentrated at the tract level: top 1% of host tracts
hold {sum(s[:top1])/total_dc*100:.0f}% of all DCs; top 5% hold {sum(s[:top5])/total_dc*100:.0f}%.
- Only {pop_dc/pop_universe*100:.2f}% of residents of the DC-hosting states actually
live in a DC-hosting tract — costs (land use, power draw, water, traffic,
noise) fall on a tiny minority of communities.
- DC-hosting tracts skew {'wealthier' if median([t['mhi'] for t in dc_tracts]) > median([u['mhi'] for u in non_dc_in_dc_states]) else 'poorer'} and {'higher' if median([t['broadband_pct'] for t in dc_tracts]) > median([u['broadband_pct'] for u in non_dc_in_dc_states]) else 'lower'} broadband than peer
tracts. See deltas above for the demographic profile.
- Broadband subscribers (proxy for who consumes cloud services) are far more
evenly distributed than DCs — HHI roughly {h_dc/sh:.0f}× lower.
That asymmetry IS the classic concentrated-cost / dispersed-benefit shape.
""")
if __name__ == "__main__":
main()

Binary file not shown.

View File

@@ -0,0 +1,194 @@
# Data Centers, Submarine Cables, and the Concentrated-Costs / Dispersed-Benefits Frame
**Author:** David Adams · **Date:** 2026-05-17
**Data:** PostGIS `data_centers` DB — `us_dc_sample_geocoded` (1,489 DCs),
`data_center_census_tracts_2024` (611 tracts, ACS 2024 5-yr enriched),
`internet_cables` (693 cables), `internet_city_dominance` (4,552 cities),
`census_tract_acs_2024_selected_states.csv` (83,811 tracts, 46 states).
---
## 1. Are US data centers spatially tied to submarine cables?
Distance from each point to the nearest submarine cable line (km):
| Group | n | Mean | p10 | p25 | **p50** | p75 | p90 | ≤10 km | ≤50 km | ≤100 km | ≤250 km |
|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
| US data centers | 1,489 | 358.7 | 21.6 | 163.1 | **276.1** | 477.4 | 867.4 | 5.2% | 16.8% | 21.4% | 32.2% |
| US population cities | 1,291 | 339.7 | 18.7 | 61.2 | **256.1** | 528.0 | 811.0 | 6.8% | 22.5% | 31.8% | 49.5% |
Mann-Whitney U two-sided: **z = 2.66, p ≈ 0.008** — significant, but in the
*opposite* direction. DCs are **not** systematically closer to cables than
ordinary US cities.
**Interpretation.** At the national level the "cables drive DC siting" story
fails. The largest clusters — Loudoun County VA (Ashburn), central
Washington, Hillsboro OR, Columbus OH, Iowa — are inland, anchored to
terrestrial fiber, cheap power, and tax incentives rather than submarine
landings. Only 21.4% of DCs sit within 100 km of any cable.
---
## 2. Cost concentration at the state level
| Measure | Value |
|---|---:|
| States covered | 46 |
| Gini of DC counts across states | 0.648 |
| HHI of state shares | 0.080 |
Top states by share of US data centers:
| State | DCs | Share | Cumulative |
|---|---:|---:|---:|
| VA | 319 | 21.4% | 21.4% |
| CA | 129 | 8.7% | 30.1% |
| TX | 120 | 8.1% | 38.1% |
| OR | 102 | 6.9% | 45.0% |
| WA | 90 | 6.0% | 51.0% |
| OH | 69 | 4.6% | 55.7% |
| AZ | 60 | 4.0% | 59.7% |
| IA | 58 | 3.9% | 63.6% |
Five states hold **half** of all US data centers.
---
## 3. Cost concentration at the tract level
Much sharper than state-level:
| Measure | Value |
|---|---:|
| DC-hosting tracts | 611 |
| DCs in those tracts | 1,489 |
| Gini of DC counts across DC-hosting tracts | 0.499 |
| HHI of DC shares across DC-hosting tracts | 0.0069 |
| **Top 1% of host tracts (6 tracts) hold** | **14.6% of all DCs** |
| Top 5% of host tracts (30 tracts) hold | 33.3% of all DCs |
| Top 20% of host tracts (122 tracts) hold | 60.6% of all DCs |
Population scaling:
| Metric | Value |
|---|---:|
| Population living in a DC-hosting tract | 2,868,863 |
| Total population (DC-state ACS universe) | 332,343,349 |
| **% of DC-host-state residents in a DC-hosting tract** | **0.86%** |
| DCs per resident, DC-hosting tracts | 1 per 1,927 |
| DCs per resident, DC-state average | 1 per 223,199 |
| **Per-capita DC burden, host vs. average** | **~115×** |
---
## 4. Who bears the costs? (ACS profile of DC tracts vs. peer tracts in same states)
| Field | DC tracts (median) | Non-DC peers (median) | Δ (DC peer) |
|---|---:|---:|---:|
| Median household income ($) | 91,082 | 76,637 | **+14,446** |
| Per-capita income ($) | 48,111 | 38,546 | +9,565 |
| Broadband subscription (%) | 94.2 | 92.0 | +2.2 |
| Poverty rate (%) | 8.8 | 10.8 | 2.0 |
| Non-Hispanic White (%) | 52.4 | 64.7 | 12.3 |
| Non-Hispanic Black (%) | 6.7 | 3.9 | +2.8 |
| Hispanic/Latino (%) | 11.9 | 9.8 | +2.1 |
| Non-Hispanic Asian (%) | 5.2 | 1.5 | +3.7 |
Population-weighted means in DC tracts: MHI **$109,145**, broadband **93.2%**,
poverty 11.1%. The actual residents of host communities are concentrated in
affluent tech corridors (Loudoun, Silicon Valley, Seattle eastside,
Hillsboro OR).
Primary-industry mix of host tracts (count of tracts):
| Tracts | Primary industry |
|---:|---|
| 351 | Educational services, and health care and social assistance |
| 133 | Professional, scientific, management, administrative, and waste management services |
| 35 | Manufacturing |
| 26 | Arts, entertainment, recreation, accommodation, and food services |
| 22 | Retail trade |
| 14 | Agriculture, forestry, fishing and hunting, and mining |
| 10 | Finance and insurance, and real estate and rental and leasing |
| 9 | Construction |
| 4 | Transportation and warehousing, and utilities |
| 3 | Public administration |
---
## 5. Cable-adjacent vs. inland DC tracts
| | ≤100 km from a cable | >100 km from a cable |
|---|---:|---:|
| Tracts | 159 | 452 |
| Data centers | 319 | 1,170 |
| Median household income ($) | 106,406 | 86,289 |
| Median broadband (%) | 95.2 | 93.9 |
| Median DC count | 1 | 1 |
Inland DCs are roughly **3.7×** the cable-adjacent count. Coastal/cable
tracts skew even wealthier than inland DC tracts.
---
## 6. Benefit dispersion (broadband subscribers as a benefit proxy)
| Measure | Value |
|---|---:|
| Estimated broadband subscribers (DC states) | 119,719,313 |
| Tracts with subscriber data | 81,839 |
| Gini of subscribers across tracts | 0.253 |
| HHI of subscribers across tracts | 0.00001 |
Side-by-side concentration:
| Series | HHI |
|---|---:|
| DCs across DC-hosting tracts | 0.0069 |
| Broadband subscribers across DC-state tracts | 0.00001 |
| **Concentration ratio** | **~464× more concentrated for DCs** |
---
## 7. Verdict
| Element of the frame | Holds? |
|---|---|
| Costs concentrated geographically | **Yes** — top 6 tracts carry 15% of DCs; <1% of host-state population lives in a DC tract; per-capita burden ~115× the average. |
| Driven by submarine cable infrastructure | **No, broadly** — proximity test fails nationally; submarine cables matter for a coastal subset only. Terrestrial fiber, power, water, land, and tax incentives dominate. |
| Benefits dispersed among users | **Yes** — broadband subscribers ~464× more dispersed (by HHI) than DCs. |
| Classic political failure mode (weak losers vs. diffuse winners) | **No.** Host tracts skew wealthier, higher-income, higher-broadband than peers. The cost-bearing communities are affluent tech corridors with strong bargaining capacity — they tend to convert concentrated costs into concentrated *rents* (tax base, jobs, infrastructure concessions). |
**Bottom line.** The structural asymmetry that defines "concentrated costs /
dispersed benefits" is unambiguous in the data — DC siting is hyper-local
while benefits are continental. But the predicted political dynamic doesn't
fit cleanly, because the loser side here is not weak. A more targeted test
would split host tracts into power-stressed exurban tracts (parts of
Loudoun's edges, central Oregon, Iowa) and urban-suburban tech-corridor
tracts, and look at whether the *exurban* subset shows the weak-loser
pattern (lower income, slower broadband, higher poverty than its
neighbors).
---
## Caveats
- The ACS universe is the 46 DC-host states (already DC-heavy); excludes
states with no DCs in the sample.
- `data_center_census_tracts_2024` only contains tracts that host at least
one DC, by construction.
- Broadband-subscription rate is a coarse benefit proxy; cloud services
benefit any internet user globally, not just local subscribers.
- 45 of 1,489 DCs use city-precision fallback coordinates, so a small share
of tract assignments are approximate.
- The `logical_dominance_ips` field in `internet_city_dominance` measures
IP blocks routed/hosted at each city — a supply-side measure that
duplicates the DC signal, not a demand-side user-location measure. It
was excluded from the benefit-dispersion calculation for that reason.
## Reproducible scripts
- `load_postgis_internet_cables.py` — ingest cables/landings/cities
- `make_internet_cables_map.py` — render the combined Leaflet map
- `analyze_cables_concentration.py` — state-level + cable-proximity analysis
- `analyze_dc_tract_concentration.py` — tract-level analysis used here

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
{"minYear":1858,"maxYear":2026}

View File

@@ -0,0 +1,136 @@
{
"1858": {
"description": "The first transatlantic telegraph cable was successfully laid, connecting Ireland to Canada. It ceased functioning after only three weeks."
},
"1866": {
"description": "A permanent and more reliable transatlantic telegraph cable was successfully laid, drastically reducing communication times between Europe and North America from weeks to minutes."
},
"1870": {
"description": "The first submarine cable connecting England to India, known as the Red Sea Line, was completed."
},
"1898": {
"description": "By the end of the 19th century, a sophisticated \"web\" of submarine telegraph cables linked Europe and North America."
},
"1902": {
"description": "The telegraph across the Pacific Ocean was completed, connecting the U.S. mainland to Hawaii. The trans-Pacific segment of the British \"All Red Line\" also connected Canada, Australia, New Zealand, and Fiji."
},
"1903": {
"description": "Guam was linked to the Philippines via undersea cable, further expanding the trans-Pacific network."
},
"1921": {
"description": "Executive Order No. 3513 was issued in the United States, relating to applications for submarine cable licenses. Cables continued to prosper due to their security, with speeds reaching 400 words per minute across the Atlantic by 1928 for telegraph messages."
},
"1951": {
"description": "Western Union integrated electronic amplifiers into their non-loaded cables at the edge of the continental shelf."
},
"1956": {
"description": "The first transatlantic telephone cable, TAT-1, commenced service, connecting Scotland and Newfoundland. This coaxial cable system initially carried 36 telephone channels and marked the beginning of global telephone communications via submarine cables."
},
"1959": {
"description": "TAT-2, a similarly designed transatlantic telephone cable, was laid from France to Canada. This year also saw the \"Transatlantic Cables Incident,\" where a Soviet vessel was boarded by the USS Roy O. Hale in response to cable breakages."
},
"1960": {
"description": "The 1960s marked the dawn of the satellite communication age, with the launch of Telstar 1 in 1962, the first active communications satellite. For a time, satellites became the primary choice for transoceanic communication."
},
"1963": {
"description": "The AT&T cableship CS Long Lines was launched and laid TAT-3, a single-line transatlantic cable capable of carrying 128 channels. The CS Long Lines also laid the first trans-Pacific cable (TPC-1) from Oahu, to Japan, via Midway, Wake, and Guam."
},
"1965": {
"description": "The first transistorized Atlantic telephone cable was introduced."
},
"1970": {
"description": "Communications-grade optical fiber was developed at Corning. TAT-5, utilizing transistors, went into service between New Jersey and Spain, carrying 845 channels."
},
"1976": {},
"1978": {
"description": "A significant commitment was made by AT&T, GPO, and Standard Telecommunications Laboratories for TAT-8, scheduled for a decade later, to be fiber-optic instead of coaxial."
},
"1982": {
"description": "TAT-2 was superseded by the emerging fiber optic cable technology."
},
"1983": {},
"1986": {
"description": "A fiber-optic cable was laid across the English Channel to Belgium, capable of carrying 11,500 telephone circuits on its fiber pairs."
},
"1987": {
"description": "A report on the erbium-doped fiber amplifier was released, a key development for optical communication."
},
"1988": {
"description": "TAT-8, the first trans-Atlantic fiber optic cable, entered service. This revolutionary cable had a capacity equivalent to 40,000 telephone circuits, a tenfold increase over its predecessor, marking a significant shift to fiber optics. With this advancement, submarine cables once again became superior to satellites for transoceanic communication."
},
"1989": {},
"1990": {
"description": "The internet became publicly accessible in 1989, marking the beginning of a new era for global communication."
},
"1991": {
"description": "The world's first website launched."
},
"1992": {
"description": "The CS Long Lines cableship completed its last mission, having laid cable on 23 missions, including 10 across the Pacific or Atlantic, since 1963."
},
"1993": {},
"1994": {
"description": "The first deployments of all-optical EDFA (Erbium-Doped Fiber Amplifier) submarine cable systems occurred."
},
"1995": {},
"1996": {
"description": "AT&T laid the first all-optic fiber cable across the Pacific, TPC-5CN."
},
"1997": {
"description": "The Fiber Optic Link Around the Globe (FLAG) was completed."
},
"1998": {
"description": "Between 1998 and 2003, approximately 70% of new undersea fiber-optic cable was laid in the Pacific Ocean, reflecting the growing global reach of the internet."
},
"1999": {},
"2000": {
"description": "Internet usage soared, with 361 million people (6% of the global population) online. A \"telecom crash\" occurred around 2000, as the installed and under-construction transmission capacity greatly exceeded actual traffic demand."
},
"2001": {},
"2002": {},
"2003": {},
"2004": {},
"2005": {},
"2006": {},
"2007": {},
"2008": {
"description": "Major submarine cable disruptions in the Mediterranean Sea and Middle East highlighted the vulnerability of this critical infrastructure, causing widespread internet slowdowns and outages."
},
"2009": {},
"2010": {},
"2011": {},
"2012": {},
"2013": {},
"2014": {},
"2015": {
"description": "Major technology companies like Google, Amazon, and Microsoft began directly investing in and building their own undersea cables."
},
"2016": {
"description": "TeleGeography's Submarine Cable Map showed 321 undersea cable systems globally."
},
"2017": {},
"2018": {
"description": "TeleGeography's Submarine Cable Map showed 366 undersea cable systems."
},
"2019": {
"description": "A \"building boom\" in undersea cables occurred, with approximately 378 submarine cables in service."
},
"2020": {
"description": "The COVID-19 pandemic led to a significant spike in internet traffic, with new system spending rebounding to $2.7 billion."
},
"2021": {
"description": "Global internet bandwidth continued its upward trajectory, rising by 29%."
},
"2022": {
"description": "Global internet bandwidth rose by 28%, reaching 997 terabits per second (Tbps). TeleGeography mapped 486 cable systems."
},
"2023": {
"description": "Global internet bandwidth increased by 23%, reaching 1,217 Tbps, and the number of cable systems grew to 529."
},
"2024": {
"description": "Several new submarine cable systems are set to go live, including parts of the 2Africa, IAX, IEX, and PEACE cables. Incidents off West Africa and in the Baltic Sea have raised concerns about the security of this vital infrastructure."
},
"2025": {
"description": "The global internet user base is projected to reach 5.6 billion. TeleGeography forecasts 597 cable systems and 1,712 landings, with the PEARLS branch of the 2Africa cable also expected to be operational."
}
}

View File

@@ -0,0 +1,428 @@
#!/usr/bin/env python3
"""Load internet_cables/*.json into PostGIS.
Reads:
- internet_cables/all_cables.json -> public.internet_cables (+ landing points)
- internet_cables/city_dominance_2026.json -> public.internet_city_dominance
- internet_cables/year-summaries.json -> public.internet_cable_year_summaries
- internet_cables/meta.json -> public.internet_cable_meta
Requires env vars: PGWEB_HOST, PGWEB_PORT, PGWEB_USER, PGWEB_PASSWORD.
"""
import argparse
import json
import os
import re
from decimal import Decimal
import psycopg2
from psycopg2.extras import Json, execute_values
DATA_DIR = "internet_cables"
DB_NAME = "data_centers"
CABLES_TABLE = "public.internet_cables"
LANDINGS_TABLE = "public.internet_cable_landing_points"
CITY_TABLE = "public.internet_city_dominance"
YEAR_TABLE = "public.internet_cable_year_summaries"
META_TABLE = "public.internet_cable_meta"
LENGTH_KM_RE = re.compile(r"([\d,\.]+)\s*km", re.IGNORECASE)
def parse_length_km(raw):
if not raw:
return None
match = LENGTH_KM_RE.search(raw)
if not match:
return None
try:
return Decimal(match.group(1).replace(",", ""))
except Exception:
return None
def to_int(value):
if value in (None, ""):
return None
try:
return int(value)
except (TypeError, ValueError):
return None
def to_bool(value):
if value is None:
return None
return bool(value)
def linestring_to_wkt(coords):
return "(" + ", ".join(f"{lon} {lat}" for lon, lat in coords) + ")"
def feature_to_multilinestring_wkt(geometry):
gtype = geometry.get("type")
coords = geometry.get("coordinates") or []
if gtype == "MultiLineString":
parts = [linestring_to_wkt(line) for line in coords if len(line) >= 2]
elif gtype == "LineString":
parts = [linestring_to_wkt(coords)] if len(coords) >= 2 else []
else:
return None
if not parts:
return None
return "MULTILINESTRING(" + ", ".join(parts) + ")"
def create_cable_tables(cur):
cur.execute(
f"""
create table {CABLES_TABLE} (
feature_id text primary key,
cable_id text,
name text,
color text,
owners text,
rfs_year integer,
decommission_year integer,
length_raw text,
length_km numeric,
cable_type text,
url text,
extra_urls jsonb,
properties jsonb,
geom geometry(MultiLineString, 4326)
)
"""
)
cur.execute(
f"create index internet_cables_geom_gix on {CABLES_TABLE} using gist (geom)"
)
cur.execute(
f"create index internet_cables_cable_id_idx on {CABLES_TABLE} (cable_id)"
)
cur.execute(
f"create index internet_cables_rfs_year_idx on {CABLES_TABLE} (rfs_year)"
)
cur.execute(
f"""
create table {LANDINGS_TABLE} (
feature_id text references {CABLES_TABLE}(feature_id) on delete cascade,
ordinal integer,
landing_id text,
name text,
country text,
is_tbd boolean,
primary key (feature_id, ordinal)
)
"""
)
cur.execute(
f"create index internet_cable_landings_landing_id_idx on {LANDINGS_TABLE} (landing_id)"
)
cur.execute(
f"create index internet_cable_landings_country_idx on {LANDINGS_TABLE} (country)"
)
def create_city_table(cur):
cur.execute(
f"""
create table {CITY_TABLE} (
id text primary key,
city text,
country text,
country_name text,
region text,
status text,
physical_capacity_tbps numeric,
added_physical_capacity_tbps numeric,
logical_dominance_ips bigint,
top_asns jsonb,
longitude double precision,
latitude double precision,
geom geometry(Point, 4326) generated always as
(ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)) stored
)
"""
)
cur.execute(
f"create index internet_city_dominance_geom_gix on {CITY_TABLE} using gist (geom)"
)
cur.execute(
f"create index internet_city_dominance_country_idx on {CITY_TABLE} (country)"
)
def create_year_table(cur):
cur.execute(
f"""
create table {YEAR_TABLE} (
year integer primary key,
description text
)
"""
)
def create_meta_table(cur):
cur.execute(
f"""
create table {META_TABLE} (
key text primary key,
value text
)
"""
)
def load_cables(cur, path):
with open(path, encoding="utf-8") as fh:
features = json.load(fh)
cable_rows = []
landing_rows = []
used_feature_ids = set()
for idx, feature in enumerate(features):
props = feature.get("properties") or {}
feature_id = props.get("feature_id") or props.get("id")
if not feature_id:
feature_id = f"legacy-{idx}"
# Disambiguate any residual collisions
base = feature_id
suffix = 1
while feature_id in used_feature_ids:
feature_id = f"{base}-{suffix}"
suffix += 1
used_feature_ids.add(feature_id)
# length may also live in a top-level lengthKm field on legacy entries
length_raw = props.get("length")
length_km = parse_length_km(length_raw)
if length_km is None and feature.get("lengthKm") is not None:
try:
length_km = Decimal(str(feature["lengthKm"]))
except Exception:
pass
wkt = feature_to_multilinestring_wkt(feature.get("geometry") or {})
cable_rows.append(
(
feature_id,
props.get("id"),
props.get("name"),
props.get("color"),
props.get("owners"),
to_int(props.get("rfs_year")),
to_int(props.get("decommission_year")),
length_raw,
length_km,
props.get("type"),
props.get("url"),
Json(props.get("extraUrls") or []),
Json(props),
wkt,
)
)
for ordinal, lp in enumerate(props.get("landing_points") or []):
landing_rows.append(
(
feature_id,
ordinal,
lp.get("id") or None,
lp.get("name"),
lp.get("country"),
to_bool(lp.get("is_tbd")),
)
)
execute_values(
cur,
f"""
insert into {CABLES_TABLE} (
feature_id, cable_id, name, color, owners, rfs_year, decommission_year,
length_raw, length_km, cable_type, url, extra_urls, properties, geom
) values %s
""",
cable_rows,
template=(
"(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, "
"ST_GeomFromText(%s, 4326))"
),
page_size=200,
)
execute_values(
cur,
f"""
insert into {LANDINGS_TABLE} (feature_id, ordinal, landing_id, name, country, is_tbd)
values %s
""",
landing_rows,
page_size=500,
)
return len(cable_rows), len(landing_rows)
def load_city_dominance(cur, path):
with open(path, encoding="utf-8") as fh:
items = json.load(fh)
rows = []
seen = set()
for item in items:
item_id = item.get("id")
if not item_id or item_id in seen:
continue
seen.add(item_id)
coords = item.get("coordinates") or [None, None]
lon, lat = (coords + [None, None])[:2]
rows.append(
(
item_id,
item.get("city"),
item.get("country"),
item.get("country_name"),
item.get("region"),
item.get("status"),
item.get("physical_capacity_tbps"),
item.get("added_physical_capacity_tbps"),
item.get("logical_dominance_ips"),
Json(item.get("top_asns") or []),
lon,
lat,
)
)
execute_values(
cur,
f"""
insert into {CITY_TABLE} (
id, city, country, country_name, region, status,
physical_capacity_tbps, added_physical_capacity_tbps,
logical_dominance_ips, top_asns, longitude, latitude
) values %s
""",
rows,
page_size=500,
)
return len(rows)
def load_year_summaries(cur, path):
with open(path, encoding="utf-8") as fh:
data = json.load(fh)
rows = []
for year_key, value in data.items():
year = to_int(year_key)
if year is None:
continue
description = value.get("description") if isinstance(value, dict) else str(value)
rows.append((year, description))
execute_values(
cur,
f"insert into {YEAR_TABLE} (year, description) values %s",
rows,
page_size=200,
)
return len(rows)
def load_meta(cur, path):
with open(path, encoding="utf-8") as fh:
data = json.load(fh)
rows = [(str(k), str(v)) for k, v in data.items()]
execute_values(
cur,
f"insert into {META_TABLE} (key, value) values %s",
rows,
)
return len(rows)
def parse_args():
parser = argparse.ArgumentParser(
description="Load internet_cables/*.json into PostGIS."
)
parser.add_argument(
"--data-dir",
default=DATA_DIR,
help=f"Directory containing the JSON files (default: {DATA_DIR})",
)
parser.add_argument(
"--replace",
action="store_true",
help="Drop existing target tables before loading.",
)
return parser.parse_args()
def main():
args = parse_args()
cables_path = os.path.join(args.data_dir, "all_cables.json")
city_path = os.path.join(args.data_dir, "city_dominance_2026.json")
year_path = os.path.join(args.data_dir, "year-summaries.json")
meta_path = os.path.join(args.data_dir, "meta.json")
for path in [cables_path, city_path, year_path, meta_path]:
if not os.path.exists(path):
raise FileNotFoundError(path)
conn = psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname=DB_NAME,
)
try:
with conn:
with conn.cursor() as cur:
cur.execute("create extension if not exists postgis")
if args.replace:
cur.execute(
f"drop table if exists {LANDINGS_TABLE}, {CABLES_TABLE}, "
f"{CITY_TABLE}, {YEAR_TABLE}, {META_TABLE} cascade"
)
for table, creator in [
(CABLES_TABLE, lambda c: create_cable_tables(c)),
(CITY_TABLE, create_city_table),
(YEAR_TABLE, create_year_table),
(META_TABLE, create_meta_table),
]:
cur.execute("select to_regclass(%s)", (table,))
if cur.fetchone()[0] is not None:
raise RuntimeError(
f"Target table {table} already exists; rerun with --replace to overwrite."
)
creator(cur)
cable_count, landing_count = load_cables(cur, cables_path)
city_count = load_city_dominance(cur, city_path)
year_count = load_year_summaries(cur, year_path)
meta_count = load_meta(cur, meta_path)
for table in [CABLES_TABLE, LANDINGS_TABLE, CITY_TABLE, YEAR_TABLE, META_TABLE]:
cur.execute(f"analyze {table}")
finally:
conn.close()
print(
f"loaded {cable_count} cables, {landing_count} landing points, "
f"{city_count} city-dominance points, {year_count} year summaries, "
f"{meta_count} meta rows."
)
if __name__ == "__main__":
main()

338
make_internet_cables_map.py Normal file
View File

@@ -0,0 +1,338 @@
#!/usr/bin/env python3
"""Render a Leaflet HTML map combining US data centers, submarine cables,
and city-level network-dominance points from PostGIS.
"""
import argparse
import json
import os
import psycopg2
DB_NAME = "data_centers"
DC_TABLE = "public.us_dc_sample_geocoded"
CABLES_TABLE = "public.internet_cables"
CITY_TABLE = "public.internet_city_dominance"
def connect():
return psycopg2.connect(
host=os.environ["PGWEB_HOST"],
port=os.environ["PGWEB_PORT"],
user=os.environ["PGWEB_USER"],
password=os.environ["PGWEB_PASSWORD"],
dbname=DB_NAME,
)
def load_data_centers(conn):
with conn.cursor() as cur:
cur.execute(
f"""
select
id,
coalesce(provider, ''),
coalesce(facility_name, ''),
coalesce(city, ''),
coalesce(state_code, ''),
longitude,
latitude,
coalesce(geocode_source, '')
from {DC_TABLE}
where longitude is not null and latitude is not null
"""
)
return [
{
"id": r[0],
"provider": r[1],
"facility_name": r[2],
"city": r[3],
"state_code": r[4],
"lon": float(r[5]),
"lat": float(r[6]),
"geocode_source": r[7],
}
for r in cur.fetchall()
]
def load_cables(conn):
with conn.cursor() as cur:
cur.execute(
f"""
select
feature_id,
coalesce(cable_id, ''),
coalesce(name, ''),
coalesce(color, '#888888'),
coalesce(owners, ''),
rfs_year,
decommission_year,
length_km,
coalesce(url, ''),
ST_AsGeoJSON(geom)
from {CABLES_TABLE}
where geom is not null
"""
)
features = []
for r in cur.fetchall():
features.append(
{
"type": "Feature",
"geometry": json.loads(r[9]),
"properties": {
"feature_id": r[0],
"cable_id": r[1],
"name": r[2],
"color": r[3],
"owners": r[4],
"rfs_year": r[5],
"decommission_year": r[6],
"length_km": float(r[7]) if r[7] is not None else None,
"url": r[8],
},
}
)
return {"type": "FeatureCollection", "features": features}
def load_cities(conn, us_only=False):
where = "where geom is not null"
if us_only:
where += " and country = 'US'"
with conn.cursor() as cur:
cur.execute(
f"""
select
id,
coalesce(city, ''),
coalesce(country, ''),
coalesce(country_name, ''),
coalesce(region, ''),
physical_capacity_tbps,
logical_dominance_ips,
longitude,
latitude
from {CITY_TABLE}
{where}
"""
)
return [
{
"id": r[0],
"city": r[1],
"country": r[2],
"country_name": r[3],
"region": r[4],
"tbps": float(r[5]) if r[5] is not None else None,
"ips": int(r[6]) if r[6] is not None else None,
"lon": float(r[7]),
"lat": float(r[8]),
}
for r in cur.fetchall()
]
def render_html(data_centers, cables_geojson, cities, output_path):
payload = json.dumps(
{
"data_centers": data_centers,
"cables": cables_geojson,
"cities": cities,
}
)
html = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>US Data Centers + Submarine Cables</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<style>
html, body { height: 100%; margin: 0; font-family: system-ui, -apple-system, Segoe UI, sans-serif; }
#layout { display: grid; grid-template-columns: 300px 1fr; height: 100%; }
#panel { padding: 14px; border-right: 1px solid #ddd; overflow: auto; background: #f8fafb; font-size: 13px; }
#map { height: 100%; width: 100%; }
h1 { margin: 0 0 8px; font-size: 18px; }
h2 { margin: 14px 0 6px; font-size: 13px; text-transform: uppercase; color: #555; letter-spacing: 0.04em; }
.row { display: flex; justify-content: space-between; padding: 2px 0; }
.swatch { width: 12px; height: 12px; display: inline-block; margin-right: 8px; vertical-align: middle; border: 1px solid #ccc; }
label.toggle { display: block; padding: 3px 0; cursor: pointer; }
@media (max-width: 900px) {
#layout { grid-template-columns: 1fr; grid-template-rows: 220px 1fr; }
#panel { border-right: 0; border-bottom: 1px solid #ddd; }
}
</style>
</head>
<body>
<div id="layout">
<div id="panel">
<h1>Data Centers + Cables</h1>
<div class="row"><span>Data centers</span><strong id="dcCount"></strong></div>
<div class="row"><span>Submarine cables</span><strong id="cableCount"></strong></div>
<div class="row"><span>City dominance pts</span><strong id="cityCount"></strong></div>
<h2>Layers</h2>
<label class="toggle"><input type="checkbox" id="tDc" checked> Data centers</label>
<label class="toggle"><input type="checkbox" id="tCables" checked> Submarine cables</label>
<label class="toggle"><input type="checkbox" id="tCities" checked> City dominance</label>
<h2>Data center source</h2>
<div class="row"><span><span class="swatch" style="background:#1f77b4"></span>IM3_Existing_DataCenters</span></div>
<div class="row"><span><span class="swatch" style="background:#2ca02c"></span>US Census Geocoder</span></div>
<div class="row"><span><span class="swatch" style="background:#ff7f0e"></span>Nominatim/OpenStreetMap</span></div>
<div class="row"><span><span class="swatch" style="background:#7f7f7f"></span>Other</span></div>
<h2>City dominance</h2>
<div class="row"><span><span class="swatch" style="background:#9b59b6;border-radius:50%"></span>Sized by physical Tbps</span></div>
</div>
<div id="map"></div>
</div>
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script>
const DATA = __PAYLOAD__;
function colorForSource(source) {
if (source === 'IM3_Existing_DataCenters') return '#1f77b4';
if (source === 'US Census Geocoder') return '#2ca02c';
if (source === 'Nominatim/OpenStreetMap') return '#ff7f0e';
return '#7f7f7f';
}
function esc(v) {
return String(v == null ? '' : v)
.replaceAll('&','&amp;').replaceAll('<','&lt;').replaceAll('>','&gt;')
.replaceAll('"','&quot;').replaceAll("'", '&#39;');
}
const map = L.map('map', { preferCanvas: true, worldCopyJump: true }).setView([20, -40], 3);
L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', {
maxZoom: 19,
attribution: '&copy; OpenStreetMap contributors'
}).addTo(map);
const cableLayer = L.geoJSON(DATA.cables, {
style: f => ({
color: f.properties.color || '#888',
weight: 1.4,
opacity: 0.75,
}),
onEachFeature: (feature, layer) => {
const p = feature.properties;
const yrs = [p.rfs_year, p.decommission_year].filter(Boolean).join(' ');
layer.bindPopup(`
<strong>${esc(p.name)}</strong><br>
${p.url ? `<a href="${esc(p.url)}" target="_blank" rel="noopener">${esc(p.url)}</a><br>` : ''}
Owners: ${esc(p.owners)}<br>
${yrs ? `Years: ${esc(yrs)}<br>` : ''}
${p.length_km ? `Length: ${esc(p.length_km.toLocaleString())} km<br>` : ''}
ID: ${esc(p.cable_id || p.feature_id)}
`);
},
}).addTo(map);
const cityLayer = L.layerGroup();
for (const c of DATA.cities) {
const tbps = c.tbps || 0;
const radius = Math.max(2, Math.min(18, Math.sqrt(tbps) * 1.6));
const m = L.circleMarker([c.lat, c.lon], {
radius,
color: '#6c2a86',
fillColor: '#9b59b6',
fillOpacity: 0.45,
weight: 0.8,
});
m.bindPopup(`
<strong>${esc(c.city)}</strong> (${esc(c.country)})<br>
Region: ${esc(c.region)}<br>
Physical capacity: ${esc(tbps.toFixed ? tbps.toFixed(2) : tbps)} Tbps<br>
Logical dominance IPs: ${esc(c.ips ? c.ips.toLocaleString() : '')}
`);
cityLayer.addLayer(m);
}
cityLayer.addTo(map);
const dcLayer = L.layerGroup();
const dcBounds = [];
for (const p of DATA.data_centers) {
const m = L.circleMarker([p.lat, p.lon], {
radius: 3,
color: colorForSource(p.geocode_source),
fillColor: colorForSource(p.geocode_source),
fillOpacity: 0.85,
weight: 0.8,
});
const title = p.facility_name || p.id;
const provider = p.provider || '(unknown provider)';
const cityState = [p.city, p.state_code].filter(Boolean).join(', ');
m.bindPopup(`
<strong>${esc(title)}</strong><br>
Provider: ${esc(provider)}<br>
Location: ${esc(cityState)}<br>
Source: ${esc(p.geocode_source)}
`);
dcLayer.addLayer(m);
dcBounds.push([p.lat, p.lon]);
}
dcLayer.addTo(map);
if (dcBounds.length) map.fitBounds(dcBounds, { padding: [30, 30], maxZoom: 5 });
function toggle(layer, on) {
if (on) { if (!map.hasLayer(layer)) layer.addTo(map); }
else { if (map.hasLayer(layer)) map.removeLayer(layer); }
}
document.getElementById('tDc').addEventListener('change', e => toggle(dcLayer, e.target.checked));
document.getElementById('tCables').addEventListener('change', e => toggle(cableLayer, e.target.checked));
document.getElementById('tCities').addEventListener('change', e => toggle(cityLayer, e.target.checked));
document.getElementById('dcCount').textContent = DATA.data_centers.length.toLocaleString();
document.getElementById('cableCount').textContent = DATA.cables.features.length.toLocaleString();
document.getElementById('cityCount').textContent = DATA.cities.length.toLocaleString();
</script>
</body>
</html>
"""
html = html.replace("__PAYLOAD__", payload)
with open(output_path, "w", encoding="utf-8") as f:
f.write(html)
def parse_args():
p = argparse.ArgumentParser(
description="Render a Leaflet map combining data centers, submarine cables, and city dominance."
)
p.add_argument("--output", default="data_centers_cables_map.html")
p.add_argument(
"--us-cities-only",
action="store_true",
help="Restrict the city-dominance layer to country='US'.",
)
return p.parse_args()
def main():
args = parse_args()
conn = connect()
try:
dcs = load_data_centers(conn)
cables = load_cables(conn)
cities = load_cities(conn, us_only=args.us_cities_only)
finally:
conn.close()
render_html(dcs, cables, cities, args.output)
print(
f"wrote {len(dcs)} data centers, "
f"{len(cables['features'])} cables, "
f"{len(cities)} city points -> {args.output}"
)
if __name__ == "__main__":
main()