Add master data center merge workflow
This commit is contained in:
@@ -8,7 +8,7 @@ import psycopg2
|
||||
|
||||
|
||||
DB_NAME = "data_centers"
|
||||
POINT_TABLE = "public.us_dc_sample_geocoded"
|
||||
POINT_TABLE = "public.master_data_centers"
|
||||
|
||||
|
||||
def connect():
|
||||
@@ -26,15 +26,17 @@ def load_points(conn):
|
||||
cur.execute(
|
||||
f"""
|
||||
select
|
||||
id,
|
||||
coalesce(provider, '') as provider,
|
||||
coalesce(facility_name, '') as facility_name,
|
||||
master_id,
|
||||
source,
|
||||
coalesce(operator, '') as operator,
|
||||
coalesce(name, '') as name,
|
||||
coalesce(city, '') as city,
|
||||
coalesce(state_code, '') as state_code,
|
||||
coalesce(state, '') as state,
|
||||
longitude,
|
||||
latitude,
|
||||
coalesce(geocode_source, '') as geocode_source,
|
||||
coalesce(geocode_precision, '') as geocode_precision,
|
||||
coalesce(curated_id, '') as curated_id,
|
||||
coalesce(osm_id, '') as osm_id,
|
||||
coalesce(match_method, '') as match_method,
|
||||
coalesce(geoid, '') as geoid
|
||||
from {POINT_TABLE}
|
||||
where longitude is not null and latitude is not null
|
||||
@@ -47,15 +49,17 @@ def load_points(conn):
|
||||
points.append(
|
||||
{
|
||||
"id": row[0],
|
||||
"provider": row[1],
|
||||
"facility_name": row[2],
|
||||
"city": row[3],
|
||||
"state_code": row[4],
|
||||
"lon": float(row[5]),
|
||||
"lat": float(row[6]),
|
||||
"geocode_source": row[7],
|
||||
"geocode_precision": row[8],
|
||||
"geoid": row[9],
|
||||
"source": row[1],
|
||||
"operator": row[2],
|
||||
"name": row[3],
|
||||
"city": row[4],
|
||||
"state": row[5],
|
||||
"lon": float(row[6]),
|
||||
"lat": float(row[7]),
|
||||
"curated_id": row[8],
|
||||
"osm_id": row[9],
|
||||
"match_method": row[10],
|
||||
"geoid": row[11],
|
||||
}
|
||||
)
|
||||
return points
|
||||
@@ -70,12 +74,12 @@ def compute_center(points):
|
||||
|
||||
|
||||
def build_stats(points):
|
||||
by_source = Counter(p["geocode_source"] or "(blank)" for p in points)
|
||||
by_precision = Counter(p["geocode_precision"] or "(blank)" for p in points)
|
||||
by_source = Counter(p["source"] or "(blank)" for p in points)
|
||||
by_match = Counter(p["match_method"] or "(none)" for p in points)
|
||||
return {
|
||||
"total": len(points),
|
||||
"by_source": dict(sorted(by_source.items(), key=lambda x: x[0])),
|
||||
"by_precision": dict(sorted(by_precision.items(), key=lambda x: x[0])),
|
||||
"by_match_method": dict(sorted(by_match.items(), key=lambda x: x[0])),
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +93,7 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
<head>
|
||||
<meta charset=\"utf-8\" />
|
||||
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
|
||||
<title>US Data Centers Map</title>
|
||||
<title>US Data Centers Master Map</title>
|
||||
<link rel=\"stylesheet\" href=\"https://unpkg.com/leaflet@1.9.4/dist/leaflet.css\" />
|
||||
<style>
|
||||
html, body {{ height: 100%; margin: 0; font-family: system-ui, -apple-system, Segoe UI, sans-serif; }}
|
||||
@@ -109,17 +113,17 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
<body>
|
||||
<div id=\"layout\">
|
||||
<div id=\"panel\">
|
||||
<h1>US Data Centers</h1>
|
||||
<h1>US Data Centers (Master)</h1>
|
||||
<div class=\"stat-row\"><span>Total points</span><strong id=\"total\"></strong></div>
|
||||
<h2>Geocode Source</h2>
|
||||
<h2>Source</h2>
|
||||
<div id=\"sourceStats\"></div>
|
||||
<h2>Geocode Precision</h2>
|
||||
<div id=\"precisionStats\"></div>
|
||||
<h2>Match Method (merged rows)</h2>
|
||||
<div id=\"matchStats\"></div>
|
||||
<h2>Source Colors</h2>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>IM3_Existing_DataCenters</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>US Census Geocoder</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>Nominatim/OpenStreetMap</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>Other/Blank</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#2ca02c\"></span>merged (curated + OSM)</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#1f77b4\"></span>curated only</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#ff7f0e\"></span>osm only</span></div>
|
||||
<div class=\"stat-row\"><span><span class=\"dot\" style=\"background:#7f7f7f\"></span>other</span></div>
|
||||
</div>
|
||||
<div id=\"map\"></div>
|
||||
</div>
|
||||
@@ -130,9 +134,9 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
const stats = {stats_json};
|
||||
|
||||
function colorForSource(source) {{
|
||||
if (source === 'IM3_Existing_DataCenters') return '#1f77b4';
|
||||
if (source === 'US Census Geocoder') return '#2ca02c';
|
||||
if (source === 'Nominatim/OpenStreetMap') return '#ff7f0e';
|
||||
if (source === 'merged') return '#2ca02c';
|
||||
if (source === 'curated') return '#1f77b4';
|
||||
if (source === 'osm') return '#ff7f0e';
|
||||
return '#7f7f7f';
|
||||
}}
|
||||
|
||||
@@ -156,22 +160,26 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
for (const p of points) {{
|
||||
const marker = L.circleMarker([p.lat, p.lon], {{
|
||||
radius: 4,
|
||||
color: colorForSource(p.geocode_source),
|
||||
fillColor: colorForSource(p.geocode_source),
|
||||
color: colorForSource(p.source),
|
||||
fillColor: colorForSource(p.source),
|
||||
fillOpacity: 0.7,
|
||||
weight: 1
|
||||
}});
|
||||
|
||||
const title = p.facility_name || p.id;
|
||||
const provider = p.provider || '(unknown provider)';
|
||||
const cityState = [p.city, p.state_code].filter(Boolean).join(', ');
|
||||
const title = p.name || p.id;
|
||||
const operator = p.operator || '(unknown operator)';
|
||||
const cityState = [p.city, p.state].filter(Boolean).join(', ');
|
||||
const provenance = [
|
||||
p.curated_id ? 'curated_id=' + escapeHtml(p.curated_id) : null,
|
||||
p.osm_id ? 'osm_id=' + escapeHtml(p.osm_id) : null,
|
||||
p.match_method ? 'match=' + escapeHtml(p.match_method) : null,
|
||||
].filter(Boolean).join('<br>');
|
||||
marker.bindPopup(`
|
||||
<strong>${{escapeHtml(title)}}</strong><br>
|
||||
Provider: ${{escapeHtml(provider)}}<br>
|
||||
ID: ${{escapeHtml(p.id)}}<br>
|
||||
Operator: ${{escapeHtml(operator)}}<br>
|
||||
Location: ${{escapeHtml(cityState)}}<br>
|
||||
Source: ${{escapeHtml(p.geocode_source)}}<br>
|
||||
Precision: ${{escapeHtml(p.geocode_precision)}}<br>
|
||||
Source: ${{escapeHtml(p.source)}}<br>
|
||||
${{provenance ? provenance + '<br>' : ''}}
|
||||
GEOID: ${{escapeHtml(p.geoid)}}
|
||||
`);
|
||||
|
||||
@@ -193,12 +201,12 @@ def render_html(points, center_lat, center_lon, output_path):
|
||||
sourceStats.appendChild(div);
|
||||
}}
|
||||
|
||||
const precisionStats = document.getElementById('precisionStats');
|
||||
for (const [k, v] of Object.entries(stats.by_precision)) {{
|
||||
const matchStats = document.getElementById('matchStats');
|
||||
for (const [k, v] of Object.entries(stats.by_match_method)) {{
|
||||
const div = document.createElement('div');
|
||||
div.className = 'stat-row';
|
||||
div.innerHTML = `<span>${{escapeHtml(k)}}</span><strong>${{v}}</strong>`;
|
||||
precisionStats.appendChild(div);
|
||||
matchStats.appendChild(div);
|
||||
}}
|
||||
</script>
|
||||
</body>
|
||||
|
||||
Reference in New Issue
Block a user