Add master data center merge workflow
This commit is contained in:
258
build_master_data_centers.py
Normal file
258
build_master_data_centers.py
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Build (or refresh) public.master_data_centers by merging:
|
||||
- public.us_dc_sample_geocoded (curated, attribute-rich)
|
||||
- public.osm_data_centers (OpenStreetMap features)
|
||||
|
||||
Deduplication rule (curated row wins):
|
||||
Step 1: for each curated row, find a matching OSM row by
|
||||
curated.id = osm.osm_id::text OR
|
||||
curated.nominatim_osm_id = osm.osm_id OR
|
||||
ST_DWithin(curated.geom, osm.geom, 150 m, geography)
|
||||
(closest match by sphere distance when multiple).
|
||||
Step 2: insert every curated row into master, filling NULLs from the
|
||||
matched OSM row when present. source = 'merged' if matched,
|
||||
otherwise 'curated'.
|
||||
Step 3: insert every OSM row whose osm_id was NOT matched in Step 1.
|
||||
source = 'osm'.
|
||||
|
||||
Result: every curated row appears once; OSM-only rows appear once; no row is
|
||||
emitted twice. The merge logic lives in a SQL function
|
||||
public.refresh_master_data_centers() so subsequent refreshes are one call.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import psycopg2
|
||||
|
||||
DB_NAME = "data_centers"
|
||||
MASTER_TABLE = "public.master_data_centers"
|
||||
CURATED_TABLE = "public.us_dc_sample_geocoded"
|
||||
OSM_TABLE = "public.osm_data_centers"
|
||||
MATCH_RADIUS_M = 150
|
||||
|
||||
|
||||
CREATE_TABLE_SQL = f"""
|
||||
create table if not exists {MASTER_TABLE} (
|
||||
master_id text primary key,
|
||||
source text not null check (source in ('curated','osm','merged')),
|
||||
curated_id text,
|
||||
osm_id text,
|
||||
name text,
|
||||
operator text,
|
||||
street_address text,
|
||||
city text,
|
||||
state text,
|
||||
postal_code text,
|
||||
country text,
|
||||
website text,
|
||||
phone text,
|
||||
power_mw numeric,
|
||||
area_sqft integer,
|
||||
nearest_airport_miles numeric,
|
||||
has_bare_metal boolean,
|
||||
has_iaas boolean,
|
||||
has_internet_exchange boolean,
|
||||
has_colocation boolean,
|
||||
certifications text,
|
||||
content_summary text,
|
||||
osm_tags jsonb,
|
||||
matched_osm_tag_passes text[],
|
||||
match_method text,
|
||||
match_distance_m numeric,
|
||||
longitude double precision not null,
|
||||
latitude double precision not null,
|
||||
geom geometry(Point, 4326)
|
||||
generated always as (ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)) stored
|
||||
);
|
||||
create index if not exists master_data_centers_geom_gix on {MASTER_TABLE} using gist (geom);
|
||||
create index if not exists master_data_centers_source_idx on {MASTER_TABLE} (source);
|
||||
create index if not exists master_data_centers_state_idx on {MASTER_TABLE} (state);
|
||||
create index if not exists master_data_centers_curated_idx on {MASTER_TABLE} (curated_id);
|
||||
create index if not exists master_data_centers_osm_idx on {MASTER_TABLE} (osm_id);
|
||||
"""
|
||||
|
||||
|
||||
REFRESH_FUNCTION_SQL = f"""
|
||||
create or replace function public.refresh_master_data_centers(match_radius_m double precision default {MATCH_RADIUS_M})
|
||||
returns table (curated_rows bigint, merged_rows bigint, osm_only_rows bigint, total_rows bigint)
|
||||
language plpgsql
|
||||
as $$
|
||||
begin
|
||||
truncate table {MASTER_TABLE};
|
||||
|
||||
-- pick a single best OSM match for each curated row, prioritizing ID
|
||||
-- equality, then nominatim id, then closest within radius
|
||||
create temporary table _curated_to_osm on commit drop as
|
||||
with ranked as (
|
||||
select
|
||||
c.id as curated_id,
|
||||
o.id as osm_id,
|
||||
case
|
||||
when c.id = o.osm_id::text then 'id'
|
||||
when c.nominatim_osm_id = o.osm_id then 'nominatim_id'
|
||||
else 'spatial'
|
||||
end as method,
|
||||
ST_DistanceSphere(c.geom, o.geom) as dist_m,
|
||||
row_number() over (
|
||||
partition by c.id
|
||||
order by
|
||||
case
|
||||
when c.id = o.osm_id::text then 0
|
||||
when c.nominatim_osm_id = o.osm_id then 1
|
||||
else 2
|
||||
end,
|
||||
ST_DistanceSphere(c.geom, o.geom) asc
|
||||
) as rn
|
||||
from {CURATED_TABLE} c
|
||||
join {OSM_TABLE} o
|
||||
on c.id = o.osm_id::text
|
||||
or c.nominatim_osm_id = o.osm_id
|
||||
or ST_DWithin(c.geom::geography, o.geom::geography, match_radius_m)
|
||||
)
|
||||
select curated_id, osm_id, method, dist_m
|
||||
from ranked
|
||||
where rn = 1;
|
||||
|
||||
-- Step 1+2: insert curated rows (with OSM nulls filled where matched)
|
||||
insert into {MASTER_TABLE} (
|
||||
master_id, source, curated_id, osm_id,
|
||||
name, operator, street_address, city, state, postal_code, country,
|
||||
website, phone, power_mw, area_sqft, nearest_airport_miles,
|
||||
has_bare_metal, has_iaas, has_internet_exchange, has_colocation,
|
||||
certifications, content_summary,
|
||||
osm_tags, matched_osm_tag_passes,
|
||||
match_method, match_distance_m,
|
||||
longitude, latitude
|
||||
)
|
||||
select
|
||||
'curated/' || c.id,
|
||||
case when m.osm_id is not null then 'merged' else 'curated' end,
|
||||
c.id,
|
||||
m.osm_id,
|
||||
coalesce(c.facility_name, o.name),
|
||||
coalesce(c.provider, o.operator),
|
||||
coalesce(c.street_address, o.street_address),
|
||||
coalesce(c.city, o.city),
|
||||
coalesce(c.state_code, o.state),
|
||||
coalesce(c.postal_code, o.postal_code),
|
||||
coalesce(c.country, o.country),
|
||||
coalesce(c.url, o.website),
|
||||
coalesce(c.phone, o.phone),
|
||||
c.power_mw,
|
||||
c.area_sqft,
|
||||
c.nearest_airport_miles,
|
||||
c.has_bare_metal,
|
||||
c.has_iaas,
|
||||
c.has_internet_exchange,
|
||||
c.has_colocation,
|
||||
c.certifications,
|
||||
c.content_summary,
|
||||
o.tags,
|
||||
o.matched_tags,
|
||||
m.method,
|
||||
round(m.dist_m::numeric, 2),
|
||||
c.longitude,
|
||||
c.latitude
|
||||
from {CURATED_TABLE} c
|
||||
left join _curated_to_osm m on m.curated_id = c.id
|
||||
left join {OSM_TABLE} o on o.id = m.osm_id;
|
||||
|
||||
-- Step 3: insert OSM rows that no curated row claimed
|
||||
insert into {MASTER_TABLE} (
|
||||
master_id, source, curated_id, osm_id,
|
||||
name, operator, street_address, city, state, postal_code, country,
|
||||
website, phone,
|
||||
osm_tags, matched_osm_tag_passes,
|
||||
longitude, latitude
|
||||
)
|
||||
select
|
||||
'osm/' || o.id,
|
||||
'osm',
|
||||
null,
|
||||
o.id,
|
||||
o.name,
|
||||
o.operator,
|
||||
o.street_address,
|
||||
o.city,
|
||||
o.state,
|
||||
o.postal_code,
|
||||
o.country,
|
||||
o.website,
|
||||
o.phone,
|
||||
o.tags,
|
||||
o.matched_tags,
|
||||
o.longitude,
|
||||
o.latitude
|
||||
from {OSM_TABLE} o
|
||||
where not exists (
|
||||
select 1 from _curated_to_osm m where m.osm_id = o.id
|
||||
);
|
||||
|
||||
analyze {MASTER_TABLE};
|
||||
|
||||
return query
|
||||
select
|
||||
count(*) filter (where source = 'curated'),
|
||||
count(*) filter (where source = 'merged'),
|
||||
count(*) filter (where source = 'osm'),
|
||||
count(*)
|
||||
from {MASTER_TABLE};
|
||||
end;
|
||||
$$;
|
||||
"""
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--radius-m",
|
||||
type=float,
|
||||
default=MATCH_RADIUS_M,
|
||||
help=f"Spatial match radius in meters (default: {MATCH_RADIUS_M}).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recreate",
|
||||
action="store_true",
|
||||
help=f"Drop and recreate {MASTER_TABLE} before building.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host=os.environ["PGWEB_HOST"],
|
||||
port=os.environ["PGWEB_PORT"],
|
||||
user=os.environ["PGWEB_USER"],
|
||||
password=os.environ["PGWEB_PASSWORD"],
|
||||
dbname=DB_NAME,
|
||||
)
|
||||
try:
|
||||
with conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("create extension if not exists postgis")
|
||||
if args.recreate:
|
||||
cur.execute(f"drop table if exists {MASTER_TABLE} cascade")
|
||||
cur.execute(CREATE_TABLE_SQL)
|
||||
cur.execute(REFRESH_FUNCTION_SQL)
|
||||
cur.execute(
|
||||
"select * from public.refresh_master_data_centers(%s)",
|
||||
(args.radius_m,),
|
||||
)
|
||||
curated, merged, osm_only, total = cur.fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print(f"master_data_centers refreshed (radius={args.radius_m} m):")
|
||||
print(f" curated-only rows: {curated}")
|
||||
print(f" merged rows (curated + OSM): {merged}")
|
||||
print(f" osm-only rows: {osm_only}")
|
||||
print(f" total: {total}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,7 +14,8 @@ from psycopg2.extras import execute_values
|
||||
|
||||
|
||||
DB_NAME = "data_centers"
|
||||
POINT_TABLE = "public.us_dc_sample_geocoded"
|
||||
POINT_TABLE = "public.master_data_centers"
|
||||
POINT_ID_COL = "master_id"
|
||||
BOUNDARY_STAGE_TABLE = "public._dc_census_tract_boundaries_2024"
|
||||
ACS_STAGE_TABLE = "public._dc_census_tract_acs_2024"
|
||||
FINAL_TABLE = "public.data_center_census_tracts_2024"
|
||||
@@ -27,6 +28,25 @@ TRACT_ZIP_URL = (
|
||||
)
|
||||
ACS_AUDIT_CSV = Path("census_tract_acs_2024_selected_states.csv")
|
||||
|
||||
STATE_NAME_TO_CODE = {
|
||||
"Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR",
|
||||
"California": "CA", "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE",
|
||||
"District of Columbia": "DC", "Florida": "FL", "Georgia": "GA", "Hawaii": "HI",
|
||||
"Idaho": "ID", "Illinois": "IL", "Indiana": "IN", "Iowa": "IA",
|
||||
"Kansas": "KS", "Kentucky": "KY", "Louisiana": "LA", "Maine": "ME",
|
||||
"Maryland": "MD", "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN",
|
||||
"Mississippi": "MS", "Missouri": "MO", "Montana": "MT", "Nebraska": "NE",
|
||||
"Nevada": "NV", "New Hampshire": "NH", "New Jersey": "NJ", "New Mexico": "NM",
|
||||
"New York": "NY", "North Carolina": "NC", "North Dakota": "ND", "Ohio": "OH",
|
||||
"Oklahoma": "OK", "Oregon": "OR", "Pennsylvania": "PA", "Rhode Island": "RI",
|
||||
"South Carolina": "SC", "South Dakota": "SD", "Tennessee": "TN", "Texas": "TX",
|
||||
"Utah": "UT", "Vermont": "VT", "Virginia": "VA", "Washington": "WA",
|
||||
"West Virginia": "WV", "Wisconsin": "WI", "Wyoming": "WY",
|
||||
"American Samoa": "AS", "Guam": "GU", "Northern Mariana Islands": "MP",
|
||||
"Puerto Rico": "PR", "United States Virgin Islands": "VI",
|
||||
"U.S. Virgin Islands": "VI", "Virgin Islands": "VI",
|
||||
}
|
||||
|
||||
STATE_FIPS = {
|
||||
"AL": "01",
|
||||
"AK": "02",
|
||||
@@ -198,16 +218,45 @@ def connect():
|
||||
)
|
||||
|
||||
|
||||
def normalize_state(value):
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if value in STATE_FIPS:
|
||||
return value
|
||||
return STATE_NAME_TO_CODE.get(value.strip())
|
||||
|
||||
|
||||
def get_state_fips(conn):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"select distinct state_code from {POINT_TABLE} order by state_code"
|
||||
f"select state, count(*) from {POINT_TABLE} group by state order by state nulls last"
|
||||
)
|
||||
state_codes = [row[0] for row in cur.fetchall()]
|
||||
missing = [code for code in state_codes if code not in STATE_FIPS]
|
||||
if missing:
|
||||
raise RuntimeError(f"Missing state FIPS mappings for: {', '.join(missing)}")
|
||||
return [STATE_FIPS[code] for code in state_codes]
|
||||
rows = cur.fetchall()
|
||||
normalized_counts = {}
|
||||
null_state_count = 0
|
||||
unknown = []
|
||||
for raw, count in rows:
|
||||
if raw is None:
|
||||
null_state_count += count
|
||||
continue
|
||||
code = normalize_state(raw)
|
||||
if code is None:
|
||||
unknown.append((raw, count))
|
||||
continue
|
||||
normalized_counts[code] = normalized_counts.get(code, 0) + count
|
||||
if unknown:
|
||||
details = ", ".join(f"{repr(name)}({n})" for name, n in unknown)
|
||||
raise RuntimeError(f"Unrecognized state values in {POINT_TABLE}: {details}")
|
||||
if null_state_count:
|
||||
print(
|
||||
f"warning: {null_state_count} master_data_centers rows have NULL state; "
|
||||
f"importing tract boundaries for all 50 states + DC + PR so spatial join can resolve them."
|
||||
)
|
||||
# Census ACS 5-year DP profile lacks coverage for the small island territories;
|
||||
# restrict to the 50 states + DC + PR which the ACS profile reliably serves.
|
||||
allowed = {"AS", "GU", "MP", "VI"}
|
||||
return sorted({fips for code, fips in STATE_FIPS.items() if code not in allowed})
|
||||
return sorted({STATE_FIPS[code] for code in normalized_counts})
|
||||
|
||||
|
||||
def ensure_final_table_absent(conn):
|
||||
@@ -290,8 +339,20 @@ def fetch_acs_for_state(state_fips):
|
||||
f"https://api.census.gov/data/{ACS_YEAR}/acs/acs5/profile?"
|
||||
+ urllib.parse.urlencode(params)
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=120) as response:
|
||||
data = json.loads(response.read().decode("utf-8"))
|
||||
body = response.read().decode("utf-8")
|
||||
except urllib.error.HTTPError as exc:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(
|
||||
f"Census ACS request failed for state {state_fips}: HTTP {exc.code} — {body[:300]}"
|
||||
) from exc
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(
|
||||
f"Census ACS returned non-JSON for state {state_fips}: {body[:300]}"
|
||||
) from exc
|
||||
|
||||
header = data[0]
|
||||
rows = []
|
||||
@@ -444,12 +505,15 @@ def create_final_table(conn):
|
||||
select
|
||||
t.geoid,
|
||||
count(*)::integer as data_center_count,
|
||||
count(*) filter (where dc.geocode_precision = 'address_range')::integer
|
||||
as address_range_data_center_count,
|
||||
count(*) filter (where dc.geocode_precision = 'city')::integer
|
||||
as city_precision_data_center_count,
|
||||
array_agg(dc.id order by dc.id) as data_center_ids,
|
||||
array_agg(distinct dc.provider order by dc.provider) as providers
|
||||
count(*) filter (where dc.source = 'curated')::integer
|
||||
as curated_only_data_center_count,
|
||||
count(*) filter (where dc.source = 'merged')::integer
|
||||
as merged_data_center_count,
|
||||
count(*) filter (where dc.source = 'osm')::integer
|
||||
as osm_only_data_center_count,
|
||||
array_agg(dc.{POINT_ID_COL} order by dc.{POINT_ID_COL}) as data_center_ids,
|
||||
array_agg(distinct dc.operator) filter (where dc.operator is not null)
|
||||
as operators
|
||||
from {BOUNDARY_STAGE_TABLE} t
|
||||
join {POINT_TABLE} dc
|
||||
on t.geom && dc.geom
|
||||
@@ -469,10 +533,11 @@ def create_final_table(conn):
|
||||
'{ACS_SOURCE}'::text as acs_source,
|
||||
a.acs_name,
|
||||
d.data_center_count,
|
||||
d.address_range_data_center_count,
|
||||
d.city_precision_data_center_count,
|
||||
d.curated_only_data_center_count,
|
||||
d.merged_data_center_count,
|
||||
d.osm_only_data_center_count,
|
||||
d.data_center_ids,
|
||||
d.providers,
|
||||
d.operators,
|
||||
a.population,
|
||||
a.median_age,
|
||||
a.households,
|
||||
@@ -532,7 +597,7 @@ def create_final_table(conn):
|
||||
cur.execute(
|
||||
f"""
|
||||
comment on table {FINAL_TABLE} is
|
||||
'Census tracts containing records from public.us_dc_sample_geocoded, enriched with ACS 2024 5-year profile demographics and derived primary industry fields.'
|
||||
'Census tracts containing records from public.master_data_centers (curated + OSM merged), enriched with ACS 2024 5-year profile demographics and derived primary industry fields.'
|
||||
"""
|
||||
)
|
||||
cur.execute(f"analyze {FINAL_TABLE}")
|
||||
@@ -550,7 +615,7 @@ def assign_point_geoids(conn):
|
||||
set geoid = matched.geoid
|
||||
from (
|
||||
select
|
||||
dc_inner.id,
|
||||
dc_inner.{POINT_ID_COL} as point_id,
|
||||
(
|
||||
select t.geoid
|
||||
from {BOUNDARY_STAGE_TABLE} t
|
||||
@@ -561,11 +626,11 @@ def assign_point_geoids(conn):
|
||||
) as geoid
|
||||
from {POINT_TABLE} dc_inner
|
||||
) matched
|
||||
where dc.id = matched.id
|
||||
where dc.{POINT_ID_COL} = matched.point_id
|
||||
"""
|
||||
)
|
||||
cur.execute(
|
||||
f"create index if not exists us_dc_sample_geocoded_geoid_idx on {POINT_TABLE} (geoid)"
|
||||
f"create index if not exists master_data_centers_geoid_idx on {POINT_TABLE} (geoid)"
|
||||
)
|
||||
cur.execute(f"analyze {POINT_TABLE}")
|
||||
|
||||
@@ -586,13 +651,21 @@ def validate(conn):
|
||||
total_points = cur.fetchone()[0]
|
||||
cur.execute(
|
||||
f"""
|
||||
select geocode_precision, count(*)::integer
|
||||
select source, count(*)::integer
|
||||
from {POINT_TABLE}
|
||||
group by geocode_precision
|
||||
order by geocode_precision
|
||||
group by source
|
||||
order by source
|
||||
"""
|
||||
)
|
||||
point_precision = cur.fetchall()
|
||||
point_source_breakdown = cur.fetchall()
|
||||
cur.execute(
|
||||
f"""
|
||||
select count(*)::integer
|
||||
from {POINT_TABLE}
|
||||
where geoid is null
|
||||
"""
|
||||
)
|
||||
unassigned_points = cur.fetchone()[0]
|
||||
cur.execute(
|
||||
f"""
|
||||
select count(*)::integer
|
||||
@@ -601,7 +674,7 @@ def validate(conn):
|
||||
"""
|
||||
)
|
||||
missing_acs = cur.fetchone()[0]
|
||||
return summary, total_points, point_precision, missing_acs
|
||||
return summary, total_points, point_source_breakdown, unassigned_points, missing_acs
|
||||
|
||||
|
||||
def main():
|
||||
@@ -638,7 +711,7 @@ def main():
|
||||
load_acs_stage(conn, acs_rows, acs_fieldnames)
|
||||
create_final_table(conn)
|
||||
assign_point_geoids(conn)
|
||||
summary, total_points, point_precision, missing_acs = validate(conn)
|
||||
summary, total_points, point_source_breakdown, unassigned_points, missing_acs = validate(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@@ -649,7 +722,8 @@ def main():
|
||||
summary[0], summary[1], summary[2], total_points
|
||||
)
|
||||
)
|
||||
print("point_precision=" + ", ".join(f"{k}:{v}" for k, v in point_precision))
|
||||
print("point_source=" + ", ".join(f"{k}:{v}" for k, v in point_source_breakdown))
|
||||
print(f"points_unassigned_to_tract={unassigned_points}")
|
||||
print(f"tracts_missing_acs_population={missing_acs}")
|
||||
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
376
load_postgis_osm_data_centers.py
Normal file
376
load_postgis_osm_data_centers.py
Normal file
@@ -0,0 +1,376 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fetch US data centers from OpenStreetMap (Overpass API) and load them into
|
||||
public.osm_data_centers in the data_centers database. Also (re)creates a
|
||||
unioned view public.data_centers_union combining OSM + curated rows from
|
||||
public.us_dc_sample_geocoded.
|
||||
|
||||
Two Overpass passes are made because tagging is inconsistent:
|
||||
1) telecom=data_center
|
||||
2) building=data_center
|
||||
|
||||
Results are deduplicated by (osm_type, osm_id); the matched tag-pass is recorded
|
||||
in match_tags so we can see which query found each feature.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import psycopg2
|
||||
import requests
|
||||
from psycopg2.extras import Json, execute_values
|
||||
|
||||
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||
TABLE = "public.osm_data_centers"
|
||||
VIEW = "public.data_centers_union"
|
||||
CURATED_TABLE = "public.us_dc_sample_geocoded"
|
||||
DB_NAME = "data_centers"
|
||||
|
||||
# Tag passes: (key, value)
|
||||
TAG_PASSES = [
|
||||
("telecom", "data_center"),
|
||||
("building", "data_center"),
|
||||
]
|
||||
|
||||
|
||||
def overpass_query(tag_key: str, tag_value: str, timeout: int = 180) -> str:
|
||||
return f"""
|
||||
[out:json][timeout:{timeout}];
|
||||
area["ISO3166-1"="US"][admin_level=2]->.us;
|
||||
(
|
||||
node["{tag_key}"="{tag_value}"](area.us);
|
||||
way["{tag_key}"="{tag_value}"](area.us);
|
||||
relation["{tag_key}"="{tag_value}"](area.us);
|
||||
);
|
||||
out center tags;
|
||||
""".strip()
|
||||
|
||||
|
||||
def fetch_pass(tag_key: str, tag_value: str, cache_path: Optional[str]) -> List[dict]:
|
||||
if cache_path and os.path.exists(cache_path):
|
||||
print(f" using cached response: {cache_path}")
|
||||
with open(cache_path, "r", encoding="utf-8") as fh:
|
||||
payload = json.load(fh)
|
||||
else:
|
||||
query = overpass_query(tag_key, tag_value)
|
||||
print(f" querying Overpass for {tag_key}={tag_value} ...")
|
||||
headers = {
|
||||
"User-Agent": "us-data-centers-inventory/1.0 (research; contact david@dadams.io)",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
resp = requests.post(
|
||||
OVERPASS_URL,
|
||||
data={"data": query},
|
||||
headers=headers,
|
||||
timeout=240,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f" Overpass returned {resp.status_code}: {resp.text[:500]}")
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
if cache_path:
|
||||
with open(cache_path, "w", encoding="utf-8") as fh:
|
||||
json.dump(payload, fh)
|
||||
print(f" cached to {cache_path}")
|
||||
elements = payload.get("elements", [])
|
||||
print(f" pass returned {len(elements)} elements")
|
||||
return elements
|
||||
|
||||
|
||||
def element_coords(elem: dict) -> Tuple[Optional[float], Optional[float]]:
|
||||
if elem.get("type") == "node":
|
||||
return elem.get("lon"), elem.get("lat")
|
||||
center = elem.get("center") or {}
|
||||
return center.get("lon"), center.get("lat")
|
||||
|
||||
|
||||
def normalize_element(elem: dict, matched_tag: str) -> Optional[dict]:
|
||||
lon, lat = element_coords(elem)
|
||||
if lon is None or lat is None:
|
||||
return None
|
||||
osm_type = elem.get("type")
|
||||
osm_id = elem.get("id")
|
||||
if osm_type is None or osm_id is None:
|
||||
return None
|
||||
tags = elem.get("tags") or {}
|
||||
return {
|
||||
"id": f"{osm_type}/{osm_id}",
|
||||
"osm_type": osm_type,
|
||||
"osm_id": int(osm_id),
|
||||
"name": tags.get("name"),
|
||||
"operator": tags.get("operator"),
|
||||
"operator_type": tags.get("operator:type"),
|
||||
"telecom": tags.get("telecom"),
|
||||
"building": tags.get("building"),
|
||||
"power": tags.get("power"),
|
||||
"website": tags.get("website") or tags.get("contact:website"),
|
||||
"phone": tags.get("phone") or tags.get("contact:phone"),
|
||||
"street_address": " ".join(
|
||||
part for part in (tags.get("addr:housenumber"), tags.get("addr:street")) if part
|
||||
) or None,
|
||||
"city": tags.get("addr:city"),
|
||||
"state": tags.get("addr:state"),
|
||||
"postal_code": tags.get("addr:postcode"),
|
||||
"country": tags.get("addr:country") or "US",
|
||||
"matched_tags": [matched_tag],
|
||||
"tags": tags,
|
||||
"longitude": float(lon),
|
||||
"latitude": float(lat),
|
||||
}
|
||||
|
||||
|
||||
def merge_records(existing: Dict[str, dict], new_rows: List[dict]) -> None:
|
||||
for row in new_rows:
|
||||
key = row["id"]
|
||||
prior = existing.get(key)
|
||||
if prior is None:
|
||||
existing[key] = row
|
||||
continue
|
||||
# merge matched_tags; keep first non-null values for other fields
|
||||
merged_tags = list(dict.fromkeys(prior["matched_tags"] + row["matched_tags"]))
|
||||
prior["matched_tags"] = merged_tags
|
||||
for col, val in row.items():
|
||||
if col == "matched_tags":
|
||||
continue
|
||||
if prior.get(col) in (None, "") and val not in (None, ""):
|
||||
prior[col] = val
|
||||
|
||||
|
||||
COLUMNS = [
|
||||
"id",
|
||||
"osm_type",
|
||||
"osm_id",
|
||||
"name",
|
||||
"operator",
|
||||
"operator_type",
|
||||
"telecom",
|
||||
"building",
|
||||
"power",
|
||||
"website",
|
||||
"phone",
|
||||
"street_address",
|
||||
"city",
|
||||
"state",
|
||||
"postal_code",
|
||||
"country",
|
||||
"matched_tags",
|
||||
"tags",
|
||||
"longitude",
|
||||
"latitude",
|
||||
]
|
||||
|
||||
|
||||
def row_to_tuple(row: dict) -> tuple:
|
||||
return (
|
||||
row["id"],
|
||||
row["osm_type"],
|
||||
row["osm_id"],
|
||||
row.get("name"),
|
||||
row.get("operator"),
|
||||
row.get("operator_type"),
|
||||
row.get("telecom"),
|
||||
row.get("building"),
|
||||
row.get("power"),
|
||||
row.get("website"),
|
||||
row.get("phone"),
|
||||
row.get("street_address"),
|
||||
row.get("city"),
|
||||
row.get("state"),
|
||||
row.get("postal_code"),
|
||||
row.get("country"),
|
||||
row.get("matched_tags", []),
|
||||
Json(row.get("tags", {})),
|
||||
row["longitude"],
|
||||
row["latitude"],
|
||||
)
|
||||
|
||||
|
||||
def create_table(cur) -> None:
|
||||
cur.execute(
|
||||
f"""
|
||||
create table {TABLE} (
|
||||
id text primary key,
|
||||
osm_type text not null,
|
||||
osm_id bigint not null,
|
||||
name text,
|
||||
operator text,
|
||||
operator_type text,
|
||||
telecom text,
|
||||
building text,
|
||||
power text,
|
||||
website text,
|
||||
phone text,
|
||||
street_address text,
|
||||
city text,
|
||||
state text,
|
||||
postal_code text,
|
||||
country text,
|
||||
matched_tags text[] not null default '{{}}',
|
||||
tags jsonb not null default '{{}}'::jsonb,
|
||||
longitude double precision not null,
|
||||
latitude double precision not null,
|
||||
ingested_at timestamptz not null default now(),
|
||||
geom geometry(Point, 4326) generated always as
|
||||
(ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)) stored
|
||||
)
|
||||
"""
|
||||
)
|
||||
cur.execute(f"create index osm_data_centers_geom_gix on {TABLE} using gist (geom)")
|
||||
cur.execute(f"create index osm_data_centers_state_idx on {TABLE} (state)")
|
||||
cur.execute(f"create index osm_data_centers_tags_gin on {TABLE} using gin (tags)")
|
||||
|
||||
|
||||
def insert_values(cur, rows: List[dict], upsert: bool) -> None:
|
||||
sql = f"insert into {TABLE} ({', '.join(COLUMNS)}) values %s"
|
||||
if upsert:
|
||||
update_cols = [c for c in COLUMNS if c != "id"]
|
||||
assignments = ", ".join(f"{c} = excluded.{c}" for c in update_cols)
|
||||
sql += (
|
||||
f" on conflict (id) do update set {assignments}, "
|
||||
f"ingested_at = now()"
|
||||
)
|
||||
execute_values(cur, sql, [row_to_tuple(r) for r in rows], page_size=200)
|
||||
|
||||
|
||||
def create_or_replace_view(cur) -> None:
|
||||
cur.execute(
|
||||
f"""
|
||||
create or replace view {VIEW} as
|
||||
select
|
||||
'curated/' || id as id,
|
||||
'curated'::text as source,
|
||||
facility_name as name,
|
||||
provider as operator,
|
||||
street_address,
|
||||
city,
|
||||
state_code as state,
|
||||
postal_code,
|
||||
country,
|
||||
url as website,
|
||||
phone,
|
||||
longitude,
|
||||
latitude,
|
||||
geom
|
||||
from {CURATED_TABLE}
|
||||
union all
|
||||
select
|
||||
id,
|
||||
'osm'::text as source,
|
||||
name,
|
||||
operator,
|
||||
street_address,
|
||||
city,
|
||||
state,
|
||||
postal_code,
|
||||
country,
|
||||
website,
|
||||
phone,
|
||||
longitude,
|
||||
latitude,
|
||||
geom
|
||||
from {TABLE}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--cache-dir",
|
||||
default="output",
|
||||
help="Directory to cache raw Overpass responses (default: output/).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-cache",
|
||||
action="store_true",
|
||||
help="Do not read or write Overpass cache files; always hit the API.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recreate",
|
||||
action="store_true",
|
||||
help=f"Drop and recreate {TABLE} before loading.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--upsert",
|
||||
action="store_true",
|
||||
default=True,
|
||||
help="On id conflicts, update the existing row (default: on).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-view",
|
||||
action="store_true",
|
||||
help=f"Do not create/replace the unioned view {VIEW}.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
os.makedirs(args.cache_dir, exist_ok=True)
|
||||
merged: Dict[str, dict] = {}
|
||||
for tag_key, tag_value in TAG_PASSES:
|
||||
cache_path = (
|
||||
None
|
||||
if args.no_cache
|
||||
else os.path.join(args.cache_dir, f"overpass_{tag_key}_{tag_value}.json")
|
||||
)
|
||||
print(f"Pass: {tag_key}={tag_value}")
|
||||
elements = fetch_pass(tag_key, tag_value, cache_path)
|
||||
normalized = [
|
||||
row for row in (normalize_element(e, f"{tag_key}={tag_value}") for e in elements)
|
||||
if row is not None
|
||||
]
|
||||
print(f" normalized {len(normalized)} rows with coords")
|
||||
merge_records(merged, normalized)
|
||||
# be polite to Overpass between passes
|
||||
time.sleep(2)
|
||||
|
||||
rows = list(merged.values())
|
||||
print(f"Total deduped OSM data-center features: {len(rows)}")
|
||||
if not rows:
|
||||
print("No rows fetched; aborting DB load.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host=os.environ["PGWEB_HOST"],
|
||||
port=os.environ["PGWEB_PORT"],
|
||||
user=os.environ["PGWEB_USER"],
|
||||
password=os.environ["PGWEB_PASSWORD"],
|
||||
dbname=DB_NAME,
|
||||
)
|
||||
try:
|
||||
with conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("create extension if not exists postgis")
|
||||
if args.recreate:
|
||||
cur.execute(f"drop table if exists {TABLE} cascade")
|
||||
cur.execute("select to_regclass(%s)", (TABLE,))
|
||||
if cur.fetchone()[0] is None:
|
||||
create_table(cur)
|
||||
insert_values(cur, rows, upsert=args.upsert)
|
||||
cur.execute(f"analyze {TABLE}")
|
||||
if not args.skip_view:
|
||||
cur.execute("select to_regclass(%s)", (CURATED_TABLE,))
|
||||
if cur.fetchone()[0] is not None:
|
||||
create_or_replace_view(cur)
|
||||
print(f"View {VIEW} (re)created.")
|
||||
else:
|
||||
print(
|
||||
f"Skipping view: {CURATED_TABLE} does not exist.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
cur.execute(f"select count(*) from {TABLE}")
|
||||
total = cur.fetchone()[0]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print(f"Loaded {len(rows)} rows into {TABLE}; table now has {total} rows total.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -8,7 +8,7 @@ import psycopg2
|
||||
|
||||
|
||||
DB_NAME = "data_centers"
|
||||
POINT_TABLE = "public.us_dc_sample_geocoded"
|
||||
POINT_TABLE = "public.master_data_centers"
|
||||
|
||||
|
||||
def connect():
|
||||
@@ -26,15 +26,17 @@ def load_points(conn):
|
||||
cur.execute(
|
||||
f"""
|
||||
select
|
||||
id,
|
||||
coalesce(provider, '') as provider,
|
||||
coalesce(facility_name, '') as facility_name,
|
||||
master_id,
|
||||
source,
|
||||
coalesce(operator, '') as operator,
|
||||
coalesce(name, '') as name,
|
||||
coalesce(city, '') as city,
|
||||
coalesce(state_code, '') as state_code,
|
||||
coalesce(state, '') as state,
|
||||
longitude,
|
||||
latitude,
|
||||
coalesce(geocode_source, '') as geocode_source,
|
||||
coalesce(geocode_precision, '') as geocode_precision,
|
||||
coalesce(curated_id, '') as curated_id,
|
||||
coalesce(osm_id, '') as osm_id,
|
||||
coalesce(match_method, '') as match_method,
|
||||
coalesce(geoid, '') as geoid
|
||||
from {POINT_TABLE}
|
||||
where longitude is not null and latitude is not null
|
||||
@@ -47,15 +49,17 @@ def load_points(conn):
|
||||
points.append(
|
||||
{
|
||||
"id": row[0],
|
||||
"provider": row[1],
|
||||
"facility_name": row[2],
|
||||
"city": row[3],
|
||||
"state_code": row[4],
|
||||
"lon": float(row[5]),
|
||||
"lat": float(row[6]),
|
||||
"geocode_source": row[7],
|
||||
"geocode_precision": row[8],
|
||||
"geoid": row[9],
|
||||
"source": row[1],
|
||||
"operator": row[2],
|
||||
"name": row[3],
|
||||
"city": row[4],
|
||||
"state": row[5],
|
||||
"lon": float(row[6]),
|
||||
"lat": float(row[7]),
|
||||
"curated_id": row[8],
|
||||
"osm_id": row[9],
|
||||
"match_method": row[10],
|
||||
"geoid": row[11],
|
||||
}
|
||||
)
|
||||
return points
|
||||
@@ -70,12 +74,12 @@ def compute_center(points):
|
||||
|
||||
|
||||
def build_stats(points):
|
||||
by_source = Counter(p["geocode_source"] or "(blank)" for p in points)
|
||||
by_precision = Counter(p["geocode_precision"] or "(blank)" for p in points)
|
||||
by_source = Counter(p["source"] or "(blank)" for p in points)
|
||||
by_match = Counter(p["match_method"] or "(none)" for p in points)
|
||||
return {
|
||||
"total": len(points),
|
||||
"by_source": dict(sorted(by_source.items(), key=lambda x: x[0])),
|
||||
"by_precision": dict(sorted(by_precision.items(), key=lambda x: x[0])),
|
||||
"by_match_method": dict(sorted(by_match.items(), key=lambda x: x[0])),
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +93,7 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
<head>
|
||||
<meta charset=\"utf-8\" />
|
||||
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
|
||||
<title>US Data Centers Map</title>
|
||||
<title>US Data Centers Master Map</title>
|
||||
<link rel=\"stylesheet\" href=\"https://unpkg.com/leaflet@1.9.4/dist/leaflet.css\" />
|
||||
<style>
|
||||
html, body {{ height: 100%; margin: 0; font-family: system-ui, -apple-system, Segoe UI, sans-serif; }}
|
||||
@@ -109,17 +113,17 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
<body>
|
||||
<div id=\"layout\">
|
||||
<div id=\"panel\">
|
||||
<h1>US Data Centers</h1>
|
||||
<h1>US Data Centers (Master)</h1>
|
||||
<div class=\"stat-row\"><span>Total points</span><strong id=\"total\"></strong></div>
|
||||
<h2>Geocode Source</h2>
|
||||
<h2>Source</h2>
|
||||
<div id=\"sourceStats\"></div>
|
||||
<h2>Geocode Precision</h2>
|
||||
<div id=\"precisionStats\"></div>
|
||||
<h2>Match Method (merged rows)</h2>
|
||||
<div id=\"matchStats\"></div>
|
||||
<h2>Source Colors</h2>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>IM3_Existing_DataCenters</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>US Census Geocoder</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>Nominatim/OpenStreetMap</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>Other/Blank</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>merged (curated + OSM)</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>curated only</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>osm only</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>other</span></div>
|
||||
</div>
|
||||
<div id=\"map\"></div>
|
||||
</div>
|
||||
@@ -130,9 +134,9 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
const stats = {stats_json};
|
||||
|
||||
function colorForSource(source) {{
|
||||
if (source === 'IM3_Existing_DataCenters') return '#1f77b4';
|
||||
if (source === 'US Census Geocoder') return '#2ca02c';
|
||||
if (source === 'Nominatim/OpenStreetMap') return '#ff7f0e';
|
||||
if (source === 'merged') return '#2ca02c';
|
||||
if (source === 'curated') return '#1f77b4';
|
||||
if (source === 'osm') return '#ff7f0e';
|
||||
return '#7f7f7f';
|
||||
}}
|
||||
|
||||
@@ -156,22 +160,26 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
for (const p of points) {{
|
||||
const marker = L.circleMarker([p.lat, p.lon], {{
|
||||
radius: 4,
|
||||
color: colorForSource(p.geocode_source),
|
||||
fillColor: colorForSource(p.geocode_source),
|
||||
color: colorForSource(p.source),
|
||||
fillColor: colorForSource(p.source),
|
||||
fillOpacity: 0.7,
|
||||
weight: 1
|
||||
}});
|
||||
|
||||
const title = p.facility_name || p.id;
|
||||
const provider = p.provider || '(unknown provider)';
|
||||
const cityState = [p.city, p.state_code].filter(Boolean).join(', ');
|
||||
const title = p.name || p.id;
|
||||
const operator = p.operator || '(unknown operator)';
|
||||
const cityState = [p.city, p.state].filter(Boolean).join(', ');
|
||||
const provenance = [
|
||||
p.curated_id ? 'curated_id=' + escapeHtml(p.curated_id) : null,
|
||||
p.osm_id ? 'osm_id=' + escapeHtml(p.osm_id) : null,
|
||||
p.match_method ? 'match=' + escapeHtml(p.match_method) : null,
|
||||
].filter(Boolean).join('<br>');
|
||||
marker.bindPopup(`
|
||||
<strong>${{escapeHtml(title)}}</strong><br>
|
||||
Provider: ${{escapeHtml(provider)}}<br>
|
||||
ID: ${{escapeHtml(p.id)}}<br>
|
||||
Operator: ${{escapeHtml(operator)}}<br>
|
||||
Location: ${{escapeHtml(cityState)}}<br>
|
||||
Source: ${{escapeHtml(p.geocode_source)}}<br>
|
||||
Precision: ${{escapeHtml(p.geocode_precision)}}<br>
|
||||
Source: ${{escapeHtml(p.source)}}<br>
|
||||
${{provenance ? provenance + '<br>' : ''}}
|
||||
GEOID: ${{escapeHtml(p.geoid)}}
|
||||
`);
|
||||
|
||||
@@ -193,12 +201,12 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
sourceStats.appendChild(div);
|
||||
}}
|
||||
|
||||
const precisionStats = document.getElementById('precisionStats');
|
||||
for (const [k, v] of Object.entries(stats.by_precision)) {{
|
||||
const matchStats = document.getElementById('matchStats');
|
||||
for (const [k, v] of Object.entries(stats.by_match_method)) {{
|
||||
const div = document.createElement('div');
|
||||
div.className = 'stat-row';
|
||||
div.innerHTML = `<span>${{escapeHtml(k)}}</span><strong>${{v}}</strong>`;
|
||||
precisionStats.appendChild(div);
|
||||
matchStats.appendChild(div);
|
||||
}}
|
||||
</script>
|
||||
</body>
|
||||
|
||||
@@ -10,7 +10,7 @@ import psycopg2
|
||||
|
||||
|
||||
DB_NAME = "data_centers"
|
||||
DC_TABLE = "public.us_dc_sample_geocoded"
|
||||
DC_TABLE = "public.master_data_centers"
|
||||
CABLES_TABLE = "public.internet_cables"
|
||||
CITY_TABLE = "public.internet_city_dominance"
|
||||
|
||||
@@ -30,14 +30,14 @@ def load_data_centers(conn):
|
||||
cur.execute(
|
||||
f"""
|
||||
select
|
||||
id,
|
||||
coalesce(provider, ''),
|
||||
coalesce(facility_name, ''),
|
||||
master_id,
|
||||
source,
|
||||
coalesce(operator, ''),
|
||||
coalesce(name, ''),
|
||||
coalesce(city, ''),
|
||||
coalesce(state_code, ''),
|
||||
coalesce(state, ''),
|
||||
longitude,
|
||||
latitude,
|
||||
coalesce(geocode_source, '')
|
||||
latitude
|
||||
from {DC_TABLE}
|
||||
where longitude is not null and latitude is not null
|
||||
"""
|
||||
@@ -45,13 +45,13 @@ def load_data_centers(conn):
|
||||
return [
|
||||
{
|
||||
"id": r[0],
|
||||
"provider": r[1],
|
||||
"facility_name": r[2],
|
||||
"city": r[3],
|
||||
"state_code": r[4],
|
||||
"lon": float(r[5]),
|
||||
"lat": float(r[6]),
|
||||
"geocode_source": r[7],
|
||||
"source": r[1],
|
||||
"operator": r[2],
|
||||
"name": r[3],
|
||||
"city": r[4],
|
||||
"state": r[5],
|
||||
"lon": float(r[6]),
|
||||
"lat": float(r[7]),
|
||||
}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
@@ -181,10 +181,10 @@ def render_html(data_centers, cables_geojson, cities, output_path):
|
||||
<label class="toggle"><input type="checkbox" id="tCities" checked> City dominance</label>
|
||||
|
||||
<h2>Data center source</h2>
|
||||
<div class="row"><span><span class="swatch" style="background:#1f77b4"></span>IM3_Existing_DataCenters</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#2ca02c"></span>US Census Geocoder</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#ff7f0e"></span>Nominatim/OpenStreetMap</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#7f7f7f"></span>Other</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#2ca02c"></span>merged (curated + OSM)</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#1f77b4"></span>curated only</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#ff7f0e"></span>osm only</span></div>
|
||||
<div class="row"><span><span class="swatch" style="background:#7f7f7f"></span>other</span></div>
|
||||
|
||||
<h2>City dominance</h2>
|
||||
<div class="row"><span><span class="swatch" style="background:#9b59b6;border-radius:50%"></span>Sized by physical Tbps</span></div>
|
||||
@@ -197,9 +197,9 @@ def render_html(data_centers, cables_geojson, cities, output_path):
|
||||
const DATA = __PAYLOAD__;
|
||||
|
||||
function colorForSource(source) {
|
||||
if (source === 'IM3_Existing_DataCenters') return '#1f77b4';
|
||||
if (source === 'US Census Geocoder') return '#2ca02c';
|
||||
if (source === 'Nominatim/OpenStreetMap') return '#ff7f0e';
|
||||
if (source === 'merged') return '#2ca02c';
|
||||
if (source === 'curated') return '#1f77b4';
|
||||
if (source === 'osm') return '#ff7f0e';
|
||||
return '#7f7f7f';
|
||||
}
|
||||
|
||||
@@ -262,19 +262,19 @@ def render_html(data_centers, cables_geojson, cities, output_path):
|
||||
for (const p of DATA.data_centers) {
|
||||
const m = L.circleMarker([p.lat, p.lon], {
|
||||
radius: 3,
|
||||
color: colorForSource(p.geocode_source),
|
||||
fillColor: colorForSource(p.geocode_source),
|
||||
color: colorForSource(p.source),
|
||||
fillColor: colorForSource(p.source),
|
||||
fillOpacity: 0.85,
|
||||
weight: 0.8,
|
||||
});
|
||||
const title = p.facility_name || p.id;
|
||||
const provider = p.provider || '(unknown provider)';
|
||||
const cityState = [p.city, p.state_code].filter(Boolean).join(', ');
|
||||
const title = p.name || p.id;
|
||||
const operator = p.operator || '(unknown operator)';
|
||||
const cityState = [p.city, p.state].filter(Boolean).join(', ');
|
||||
m.bindPopup(`
|
||||
<strong>${esc(title)}</strong><br>
|
||||
Provider: ${esc(provider)}<br>
|
||||
Operator: ${esc(operator)}<br>
|
||||
Location: ${esc(cityState)}<br>
|
||||
Source: ${esc(p.geocode_source)}
|
||||
Source: ${esc(p.source)}
|
||||
`);
|
||||
dcLayer.addLayer(m);
|
||||
dcBounds.push([p.lat, p.lon]);
|
||||
|
||||
1
output/overpass_building_data_center.json
Normal file
1
output/overpass_building_data_center.json
Normal file
File diff suppressed because one or more lines are too long
1
output/overpass_telecom_data_center.json
Normal file
1
output/overpass_telecom_data_center.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user