Add master data center merge workflow

This commit is contained in:
2026-05-17 18:53:16 -07:00
parent 8fcbb18e37
commit 90e8b21423
10 changed files with 11892 additions and 9599 deletions

View File

@@ -8,7 +8,7 @@ import psycopg2
DB_NAME = "data_centers"
POINT_TABLE = "public.us_dc_sample_geocoded"
POINT_TABLE = "public.master_data_centers"
def connect():
@@ -26,15 +26,17 @@ def load_points(conn):
cur.execute(
f"""
select
id,
coalesce(provider, '') as provider,
coalesce(facility_name, '') as facility_name,
master_id,
source,
coalesce(operator, '') as operator,
coalesce(name, '') as name,
coalesce(city, '') as city,
coalesce(state_code, '') as state_code,
coalesce(state, '') as state,
longitude,
latitude,
coalesce(geocode_source, '') as geocode_source,
coalesce(geocode_precision, '') as geocode_precision,
coalesce(curated_id, '') as curated_id,
coalesce(osm_id, '') as osm_id,
coalesce(match_method, '') as match_method,
coalesce(geoid, '') as geoid
from {POINT_TABLE}
where longitude is not null and latitude is not null
@@ -47,15 +49,17 @@ def load_points(conn):
points.append(
{
"id": row[0],
"provider": row[1],
"facility_name": row[2],
"city": row[3],
"state_code": row[4],
"lon": float(row[5]),
"lat": float(row[6]),
"geocode_source": row[7],
"geocode_precision": row[8],
"geoid": row[9],
"source": row[1],
"operator": row[2],
"name": row[3],
"city": row[4],
"state": row[5],
"lon": float(row[6]),
"lat": float(row[7]),
"curated_id": row[8],
"osm_id": row[9],
"match_method": row[10],
"geoid": row[11],
}
)
return points
@@ -70,12 +74,12 @@ def compute_center(points):
def build_stats(points):
by_source = Counter(p["geocode_source"] or "(blank)" for p in points)
by_precision = Counter(p["geocode_precision"] or "(blank)" for p in points)
by_source = Counter(p["source"] or "(blank)" for p in points)
by_match = Counter(p["match_method"] or "(none)" for p in points)
return {
"total": len(points),
"by_source": dict(sorted(by_source.items(), key=lambda x: x[0])),
"by_precision": dict(sorted(by_precision.items(), key=lambda x: x[0])),
"by_match_method": dict(sorted(by_match.items(), key=lambda x: x[0])),
}
@@ -89,7 +93,7 @@ def render_html(points, center_lat, center_lon, output_path):
<head>
<meta charset=\"utf-8\" />
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
<title>US Data Centers Map</title>
<title>US Data Centers Master Map</title>
<link rel=\"stylesheet\" href=\"https://unpkg.com/leaflet@1.9.4/dist/leaflet.css\" />
<style>
html, body {{ height: 100%; margin: 0; font-family: system-ui, -apple-system, Segoe UI, sans-serif; }}
@@ -109,17 +113,17 @@ def render_html(points, center_lat, center_lon, output_path):
<body>
<div id=\"layout\">
<div id=\"panel\">
<h1>US Data Centers</h1>
<h1>US Data Centers (Master)</h1>
<div class=\"stat-row\"><span>Total points</span><strong id=\"total\"></strong></div>
<h2>Geocode Source</h2>
<h2>Source</h2>
<div id=\"sourceStats\"></div>
<h2>Geocode Precision</h2>
<div id=\"precisionStats\"></div>
<h2>Match Method (merged rows)</h2>
<div id=\"matchStats\"></div>
<h2>Source Colors</h2>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>IM3_Existing_DataCenters</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>US Census Geocoder</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>Nominatim/OpenStreetMap</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>Other/Blank</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>merged (curated + OSM)</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>curated only</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>osm only</span></div>
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>other</span></div>
</div>
<div id=\"map\"></div>
</div>
@@ -130,9 +134,9 @@ def render_html(points, center_lat, center_lon, output_path):
const stats = {stats_json};
function colorForSource(source) {{
if (source === 'IM3_Existing_DataCenters') return '#1f77b4';
if (source === 'US Census Geocoder') return '#2ca02c';
if (source === 'Nominatim/OpenStreetMap') return '#ff7f0e';
if (source === 'merged') return '#2ca02c';
if (source === 'curated') return '#1f77b4';
if (source === 'osm') return '#ff7f0e';
return '#7f7f7f';
}}
@@ -156,22 +160,26 @@ def render_html(points, center_lat, center_lon, output_path):
for (const p of points) {{
const marker = L.circleMarker([p.lat, p.lon], {{
radius: 4,
color: colorForSource(p.geocode_source),
fillColor: colorForSource(p.geocode_source),
color: colorForSource(p.source),
fillColor: colorForSource(p.source),
fillOpacity: 0.7,
weight: 1
}});
const title = p.facility_name || p.id;
const provider = p.provider || '(unknown provider)';
const cityState = [p.city, p.state_code].filter(Boolean).join(', ');
const title = p.name || p.id;
const operator = p.operator || '(unknown operator)';
const cityState = [p.city, p.state].filter(Boolean).join(', ');
const provenance = [
p.curated_id ? 'curated_id=' + escapeHtml(p.curated_id) : null,
p.osm_id ? 'osm_id=' + escapeHtml(p.osm_id) : null,
p.match_method ? 'match=' + escapeHtml(p.match_method) : null,
].filter(Boolean).join('<br>');
marker.bindPopup(`
<strong>${{escapeHtml(title)}}</strong><br>
Provider: ${{escapeHtml(provider)}}<br>
ID: ${{escapeHtml(p.id)}}<br>
Operator: ${{escapeHtml(operator)}}<br>
Location: ${{escapeHtml(cityState)}}<br>
Source: ${{escapeHtml(p.geocode_source)}}<br>
Precision: ${{escapeHtml(p.geocode_precision)}}<br>
Source: ${{escapeHtml(p.source)}}<br>
${{provenance ? provenance + '<br>' : ''}}
GEOID: ${{escapeHtml(p.geoid)}}
`);
@@ -193,12 +201,12 @@ def render_html(points, center_lat, center_lon, output_path):
sourceStats.appendChild(div);
}}
const precisionStats = document.getElementById('precisionStats');
for (const [k, v] of Object.entries(stats.by_precision)) {{
const matchStats = document.getElementById('matchStats');
for (const [k, v] of Object.entries(stats.by_match_method)) {{
const div = document.createElement('div');
div.className = 'stat-row';
div.innerHTML = `<span>${{escapeHtml(k)}}</span><strong>${{v}}</strong>`;
precisionStats.appendChild(div);
matchStats.appendChild(div);
}}
</script>
</body>