climate and voeter
This commit is contained in:
@@ -5,4 +5,5 @@ __pycache__/
|
||||
_pycache__/
|
||||
new/
|
||||
internet_cables/
|
||||
.claude/
|
||||
.claude/
|
||||
.zip
|
||||
|
||||
12
.gitignore
vendored
12
.gitignore
vendored
@@ -77,4 +77,14 @@ Thumbs.db
|
||||
|
||||
# Data csv, json, etc.
|
||||
new/
|
||||
internet_cables/
|
||||
internet_cables/
|
||||
data/
|
||||
|
||||
# Compressed files
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.tar.bz2
|
||||
*.tar.xz
|
||||
*.gz
|
||||
*.bz2
|
||||
*.xz
|
||||
File diff suppressed because it is too large
Load Diff
854
open_meteo_historical_data_centers.ipynb
Normal file
854
open_meteo_historical_data_centers.ipynb
Normal file
@@ -0,0 +1,854 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Historical Climate for Master Data Centers (Daymet + gridMET)\n",
|
||||
"\n",
|
||||
"This notebook builds or refreshes `public.data_center_historical_climate`, a one-row-per-data-center climate summary keyed by `master_id`.\n",
|
||||
"\n",
|
||||
"**Source switch on 2026-05-20.** Previously this notebook used Open-Meteo's archive API, but a 30-year × 9-variable request consumes ~7,000 of the free 10,000 daily call budget, limiting the run to ~1 site/day. Two free, generous federal sources replace it:\n",
|
||||
"\n",
|
||||
"- **Daymet v4 R1** (ORNL DAAC): daily 1 km gridded `tmin`, `tmax`, `prcp`, `vp` (vapor pressure) for North America, 1980–present. Pulled via the single-pixel REST API.\n",
|
||||
"- **gridMET / METDATA** (U. Idaho): daily 4 km gridded daily-mean wind speed (`vs`) for CONUS, 1979–present. Pulled via THREDDS NetCDF Subset Service as CSV (no `xarray` dependency).\n",
|
||||
"\n",
|
||||
"Climate normal period: **1991–2020** (standard WMO baseline).\n",
|
||||
"\n",
|
||||
"| Metric | Field(s) | Source / derivation |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| Mean annual / summer temperature | `mean_annual_temperature_c`, `mean_summer_temperature_c` | Daymet `(tmin + tmax) / 2` |\n",
|
||||
"| Max / min daily temperature | `max_daily_temperature_c`, `min_daily_temperature_c` | Daymet `tmax`, `tmin` |\n",
|
||||
"| Diurnal temperature range | `mean_diurnal_temperature_range_c` | Daymet `tmax − tmin` |\n",
|
||||
"| Relative humidity | `mean_relative_humidity_pct` | Derived `100 × vp / e_sat(T)` via Magnus formula |\n",
|
||||
"| Wet bulb temperature | `mean_wet_bulb_temperature_c`, `max_wet_bulb_temperature_c`, `extreme_wet_bulb_days` | Stull (2011) approximation from T + derived RH |\n",
|
||||
"| Cooling degree days | `cooling_degree_days_c`, `annual_cooling_degree_days_c_mean` | Σ max(0, T_mean − 18.3 °C) |\n",
|
||||
"| Extreme heat days | `extreme_heat_days`, `annual_extreme_heat_days_mean` | Count days with `tmax ≥ 35 °C` |\n",
|
||||
"| Precipitation | `precipitation_total_mm`, `annual_precipitation_mm_mean`, `annual_precipitation_cv`, `wet_day_precipitation_p95_mm` | Daymet `prcp` aggregated |\n",
|
||||
"| Wind | `mean_wind_speed_ms`, `max_daily_mean_wind_speed_ms`, `sustained_wind_days`, `annual_sustained_wind_days_mean` | gridMET `vs` (daily-mean wind). **Sustained-wind day = daily mean ≥ 8 m/s** — this is *not* the same as the old Open-Meteo gust threshold; see notes at end. |\n",
|
||||
"| Drought severity, wildfire smoke | `*_source_status` columns | External sources needed (USDM, gridMET PDSI, NOAA HMS) |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import math\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"import urllib.error\n",
|
||||
"import urllib.parse\n",
|
||||
"import urllib.request\n",
|
||||
"from io import StringIO\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import psycopg2\n",
|
||||
"from psycopg2 import sql\n",
|
||||
"from psycopg2.extras import execute_values\n",
|
||||
"\n",
|
||||
"pd.set_option('display.max_columns', 100)\n",
|
||||
"pd.set_option('display.max_rows', 120)\n",
|
||||
"\n",
|
||||
"print('pandas:', pd.__version__)\n",
|
||||
"print('numpy:', np.__version__)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def load_env_file(env_path: str = '.env') -> None:\n",
|
||||
" p = Path(env_path)\n",
|
||||
" if not p.exists():\n",
|
||||
" print(f'No {env_path} file found in {Path.cwd()}')\n",
|
||||
" return\n",
|
||||
" loaded = 0\n",
|
||||
" for raw_line in p.read_text(encoding='utf-8').splitlines():\n",
|
||||
" line = raw_line.strip()\n",
|
||||
" if not line or line.startswith('#') or '=' not in line:\n",
|
||||
" continue\n",
|
||||
" key, value = line.split('=', 1)\n",
|
||||
" key = key.strip()\n",
|
||||
" value = value.strip().strip('\"').strip(\"'\")\n",
|
||||
" if key and key not in os.environ:\n",
|
||||
" os.environ[key] = value\n",
|
||||
" loaded += 1\n",
|
||||
" print(f'Loaded {loaded} env var(s) from {env_path}')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def require_env(keys):\n",
|
||||
" missing = [k for k in keys if not os.getenv(k)]\n",
|
||||
" if missing:\n",
|
||||
" raise EnvironmentError(\n",
|
||||
" 'Missing required env vars in notebook kernel: ' + ', '.join(missing) +\n",
|
||||
" '.\\nSet them in this notebook, or add them to a .env file in this folder.'\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"load_env_file('.env')\n",
|
||||
"required_keys = ['PGWEB_HOST', 'PGWEB_PORT', 'PGWEB_USER', 'PGWEB_PASSWORD']\n",
|
||||
"require_env(required_keys)\n",
|
||||
"\n",
|
||||
"DB_NAME = os.getenv('PGDATABASE', 'data_centers')\n",
|
||||
"MASTER_TABLE = 'public.master_data_centers'\n",
|
||||
"CLIMATE_TABLE = 'public.data_center_historical_climate'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_conn():\n",
|
||||
" return psycopg2.connect(\n",
|
||||
" host=os.environ['PGWEB_HOST'],\n",
|
||||
" port=os.environ['PGWEB_PORT'],\n",
|
||||
" user=os.environ['PGWEB_USER'],\n",
|
||||
" password=os.environ['PGWEB_PASSWORD'],\n",
|
||||
" dbname='data_centers',\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with get_conn() as conn:\n",
|
||||
" with conn.cursor() as cur:\n",
|
||||
" cur.execute('select current_database(), current_user')\n",
|
||||
" db, usr = cur.fetchone()\n",
|
||||
" print('Connected to DB:', db)\n",
|
||||
" print('As user:', usr)\n",
|
||||
" cur.execute('create extension if not exists postgis')\n",
|
||||
" cur.execute('select to_regclass(%s)', (MASTER_TABLE,))\n",
|
||||
" if cur.fetchone()[0] is None:\n",
|
||||
" raise RuntimeError(f'{MASTER_TABLE} does not exist. Run build_master_data_centers.py first.')\n",
|
||||
" cur.execute(sql.SQL('select count(*) from {}').format(sql.SQL(MASTER_TABLE)))\n",
|
||||
" print(f'{MASTER_TABLE} rows:', f'{cur.fetchone()[0]:,}')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Parameters\n",
|
||||
"\n",
|
||||
"Climate normal period defaults to **1991–2020**, matching the current WMO baseline. Both Daymet and gridMET cover this range fully.\n",
|
||||
"\n",
|
||||
"- `REFRESH_EXISTING=True` recomputes existing rows (e.g. after a threshold change).\n",
|
||||
"- `INCLUDE_WIND=False` skips the per-site gridMET pass; wind columns stay null and `wind_status='pending_gridmet'`. Useful for a fast first pass since Daymet alone is one HTTP call per site, while gridMET is ~30 (one per year).\n",
|
||||
"- `MAX_POINTS=3` for testing, `None` for all eligible sites.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ---- Daymet (T, precip, vapor pressure) ----\n",
|
||||
"DAYMET_API_URL = 'https://daymet.ornl.gov/single-pixel/api/data'\n",
|
||||
"DAYMET_VARIABLES = ['tmin', 'tmax', 'prcp', 'vp']\n",
|
||||
"DAYMET_DATASET_VERSION = 'daymet_v4r1'\n",
|
||||
"\n",
|
||||
"# ---- gridMET (daily-mean wind) ----\n",
|
||||
"GRIDMET_NCSS_URL_TEMPLATE = 'https://thredds.northwestknowledge.net/thredds/ncss/MET/vs/vs_{year}.nc'\n",
|
||||
"GRIDMET_VARIABLE = 'wind_speed'\n",
|
||||
"GRIDMET_SCALE_FACTOR = 0.1 # NCSS CSV returns packed short ints; multiply to get m/s\n",
|
||||
"GRIDMET_DATASET_VERSION = 'gridmet_vs'\n",
|
||||
"\n",
|
||||
"# ---- Period ----\n",
|
||||
"START_DATE = '1991-01-01'\n",
|
||||
"END_DATE = '2020-12-31'\n",
|
||||
"START_YEAR = int(START_DATE[:4])\n",
|
||||
"END_YEAR = int(END_DATE[:4])\n",
|
||||
"\n",
|
||||
"# ---- Run control ----\n",
|
||||
"REFRESH_EXISTING = False\n",
|
||||
"INCLUDE_WIND = True\n",
|
||||
"DRY_RUN = False\n",
|
||||
"MAX_POINTS = 3 # small int for testing; None for all eligible\n",
|
||||
"REQUEST_SLEEP_SECONDS = 0.5 # politeness pause between sites\n",
|
||||
"REQUEST_TIMEOUT_SECONDS = 90\n",
|
||||
"MAX_RETRIES = 5\n",
|
||||
"\n",
|
||||
"# ---- Metric thresholds ----\n",
|
||||
"CDD_BASE_C = 18.3\n",
|
||||
"EXTREME_HEAT_THRESHOLD_C = 35.0\n",
|
||||
"EXTREME_WET_BULB_THRESHOLD_C = 28.0\n",
|
||||
"SUSTAINED_WIND_THRESHOLD_MS = 8.0 # ~18 mph daily-mean wind. NOT a gust threshold.\n",
|
||||
"WET_DAY_THRESHOLD_MM = 1.0\n",
|
||||
"\n",
|
||||
"print(f'Period: {START_DATE} to {END_DATE}')\n",
|
||||
"print(f'Target table: {CLIMATE_TABLE}')\n",
|
||||
"print(f'Daymet vars: {DAYMET_VARIABLES}')\n",
|
||||
"print(f'Include wind: {INCLUDE_WIND} (gridMET sustained-wind threshold = {SUSTAINED_WIND_THRESHOLD_MS} m/s daily mean)')\n",
|
||||
"print(f'Refresh: {REFRESH_EXISTING}')\n",
|
||||
"print(f'Max sites: {MAX_POINTS}')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Target Table"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CREATE_CLIMATE_TABLE_SQL = f'''\n",
|
||||
"create table if not exists {CLIMATE_TABLE} (\n",
|
||||
" master_id text primary key references public.master_data_centers(master_id) on delete cascade,\n",
|
||||
" source text,\n",
|
||||
" name text,\n",
|
||||
" operator text,\n",
|
||||
" city text,\n",
|
||||
" state text,\n",
|
||||
" country text,\n",
|
||||
" longitude double precision not null,\n",
|
||||
" latitude double precision not null,\n",
|
||||
" geom geometry(Point, 4326),\n",
|
||||
"\n",
|
||||
" daymet_dataset_version text not null,\n",
|
||||
" gridmet_dataset_version text,\n",
|
||||
" climate_period_start date not null,\n",
|
||||
" climate_period_end date not null,\n",
|
||||
" daymet_tile integer,\n",
|
||||
" daymet_elevation_m double precision,\n",
|
||||
" daymet_grid_x_m double precision,\n",
|
||||
" daymet_grid_y_m double precision,\n",
|
||||
" gridmet_grid_latitude double precision,\n",
|
||||
" gridmet_grid_longitude double precision,\n",
|
||||
" observation_days integer not null,\n",
|
||||
" observation_years integer not null,\n",
|
||||
"\n",
|
||||
" -- temperature\n",
|
||||
" mean_annual_temperature_c double precision,\n",
|
||||
" mean_summer_temperature_c double precision,\n",
|
||||
" max_daily_temperature_c double precision,\n",
|
||||
" min_daily_temperature_c double precision,\n",
|
||||
" mean_diurnal_temperature_range_c double precision,\n",
|
||||
"\n",
|
||||
" -- humidity / wet-bulb (derived from Daymet T + vp)\n",
|
||||
" mean_relative_humidity_pct double precision,\n",
|
||||
" mean_wet_bulb_temperature_c double precision,\n",
|
||||
" max_wet_bulb_temperature_c double precision,\n",
|
||||
" extreme_wet_bulb_days integer,\n",
|
||||
"\n",
|
||||
" -- cooling load / heat extremes\n",
|
||||
" cooling_degree_days_c double precision,\n",
|
||||
" annual_cooling_degree_days_c_mean double precision,\n",
|
||||
" extreme_heat_days integer,\n",
|
||||
" annual_extreme_heat_days_mean double precision,\n",
|
||||
"\n",
|
||||
" -- precipitation\n",
|
||||
" precipitation_total_mm double precision,\n",
|
||||
" annual_precipitation_mm_mean double precision,\n",
|
||||
" annual_precipitation_cv double precision,\n",
|
||||
" wet_day_precipitation_p95_mm double precision,\n",
|
||||
"\n",
|
||||
" -- wind (gridMET daily-mean, NOT gust)\n",
|
||||
" mean_wind_speed_ms double precision,\n",
|
||||
" max_daily_mean_wind_speed_ms double precision,\n",
|
||||
" sustained_wind_days integer,\n",
|
||||
" annual_sustained_wind_days_mean double precision,\n",
|
||||
" wind_status text not null default 'pending_gridmet',\n",
|
||||
"\n",
|
||||
" -- external-source placeholders\n",
|
||||
" drought_severity_index_source_status text not null default 'needs_external_source',\n",
|
||||
" wildfire_smoke_exposure_source_status text not null default 'needs_external_source',\n",
|
||||
"\n",
|
||||
" -- provenance\n",
|
||||
" daymet_variables text[] not null,\n",
|
||||
" daymet_url text,\n",
|
||||
" fetched_at timestamptz not null default now(),\n",
|
||||
" updated_at timestamptz not null default now()\n",
|
||||
");\n",
|
||||
"\n",
|
||||
"create index if not exists data_center_hist_climate_geom_gix\n",
|
||||
" on {CLIMATE_TABLE} using gist (geom);\n",
|
||||
"create index if not exists data_center_hist_climate_state_idx\n",
|
||||
" on {CLIMATE_TABLE} (state);\n",
|
||||
"create index if not exists data_center_hist_climate_period_idx\n",
|
||||
" on {CLIMATE_TABLE} (climate_period_start, climate_period_end);\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"with get_conn() as conn:\n",
|
||||
" with conn.cursor() as cur:\n",
|
||||
" cur.execute(CREATE_CLIMATE_TABLE_SQL)\n",
|
||||
" cur.execute(sql.SQL('select count(*) from {}').format(sql.SQL(CLIMATE_TABLE)))\n",
|
||||
" existing = cur.fetchone()[0]\n",
|
||||
" print(f'{CLIMATE_TABLE} currently has {existing:,} row(s)')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Data Centers Needing Climate Metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def load_target_points(refresh_existing=False, max_points=None):\n",
|
||||
" exists_filter = ''\n",
|
||||
" params = []\n",
|
||||
" if not refresh_existing:\n",
|
||||
" exists_filter = f'''\n",
|
||||
" and not exists (\n",
|
||||
" select 1\n",
|
||||
" from {CLIMATE_TABLE} c\n",
|
||||
" where c.master_id = m.master_id\n",
|
||||
" and c.daymet_dataset_version = %s\n",
|
||||
" and c.climate_period_start = %s::date\n",
|
||||
" and c.climate_period_end = %s::date\n",
|
||||
" )\n",
|
||||
" '''\n",
|
||||
" params = [DAYMET_DATASET_VERSION, START_DATE, END_DATE]\n",
|
||||
"\n",
|
||||
" limit_sql = ''\n",
|
||||
" if max_points is not None:\n",
|
||||
" limit_sql = 'limit %s'\n",
|
||||
" params.append(int(max_points))\n",
|
||||
"\n",
|
||||
" query = f'''\n",
|
||||
" select\n",
|
||||
" m.master_id, m.source, m.name, m.operator,\n",
|
||||
" m.city, m.state, m.country,\n",
|
||||
" m.longitude, m.latitude\n",
|
||||
" from {MASTER_TABLE} m\n",
|
||||
" where m.longitude is not null\n",
|
||||
" and m.latitude is not null\n",
|
||||
" and m.geom is not null\n",
|
||||
" -- Daymet covers North America bounding box\n",
|
||||
" and m.longitude between -178 and -52\n",
|
||||
" and m.latitude between 14 and 83\n",
|
||||
" {exists_filter}\n",
|
||||
" order by m.master_id\n",
|
||||
" {limit_sql}\n",
|
||||
" '''\n",
|
||||
" with get_conn() as conn:\n",
|
||||
" return pd.read_sql_query(query, conn, params=params)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"targets = load_target_points(refresh_existing=REFRESH_EXISTING, max_points=MAX_POINTS)\n",
|
||||
"print(f'Target data centers to fetch: {len(targets):,}')\n",
|
||||
"display(targets.head())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fetchers and derived-metric helpers\n",
|
||||
"\n",
|
||||
"`fetch_daymet` issues one single-pixel REST call for the full 30-year period. `fetch_gridmet_wind` issues one NCSS CSV call per year (Daymet has no equivalent multi-year aggregated endpoint we can rely on; per-year fetches keep each response small).\n",
|
||||
"\n",
|
||||
"Wet bulb is derived in two steps:\n",
|
||||
"1. **RH from Daymet vp** using the Magnus saturation-vapor-pressure formula.\n",
|
||||
"2. **Wet bulb from T + RH** using the Stull (2011) approximation (valid 5 % ≤ RH ≤ 99 %, −20 °C ≤ T ≤ +50 °C).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def http_get(url, max_retries=MAX_RETRIES, timeout=REQUEST_TIMEOUT_SECONDS):\n",
|
||||
" last_error = None\n",
|
||||
" for attempt in range(1, max_retries + 1):\n",
|
||||
" try:\n",
|
||||
" req = urllib.request.Request(url, headers={'User-Agent': 'data-center-climate-notebook/1.1'})\n",
|
||||
" with urllib.request.urlopen(req, timeout=timeout) as resp:\n",
|
||||
" return resp.read().decode('utf-8')\n",
|
||||
" except urllib.error.HTTPError as exc:\n",
|
||||
" last_error = exc\n",
|
||||
" try:\n",
|
||||
" body = exc.read().decode('utf-8', errors='replace')[:200]\n",
|
||||
" except Exception:\n",
|
||||
" body = ''\n",
|
||||
" if exc.code in (400, 404):\n",
|
||||
" raise RuntimeError(f'HTTP {exc.code} on {url}: {body or exc.reason}')\n",
|
||||
" sleep_for = min(2 ** attempt, 60)\n",
|
||||
" print(f' HTTP {exc.code} attempt {attempt}/{max_retries}: {body}; sleeping {sleep_for}s')\n",
|
||||
" time.sleep(sleep_for)\n",
|
||||
" except (urllib.error.URLError, TimeoutError) as exc:\n",
|
||||
" last_error = exc\n",
|
||||
" sleep_for = min(2 ** attempt, 60)\n",
|
||||
" print(f' Network error attempt {attempt}/{max_retries}: {exc}; sleeping {sleep_for}s')\n",
|
||||
" time.sleep(sleep_for)\n",
|
||||
" raise RuntimeError(f'Request failed after {max_retries} attempt(s) for {url}: {last_error}')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def fetch_daymet(lat, lon):\n",
|
||||
" \"\"\"Return (header_dict, dataframe) for one point covering START_DATE..END_DATE.\"\"\"\n",
|
||||
" params = {\n",
|
||||
" 'lat': f'{float(lat):.6f}',\n",
|
||||
" 'lon': f'{float(lon):.6f}',\n",
|
||||
" 'vars': ','.join(DAYMET_VARIABLES),\n",
|
||||
" 'start': START_DATE,\n",
|
||||
" 'end': END_DATE,\n",
|
||||
" 'format': 'csv',\n",
|
||||
" }\n",
|
||||
" url = DAYMET_API_URL + '?' + urllib.parse.urlencode(params)\n",
|
||||
" text = http_get(url)\n",
|
||||
" # Daymet CSV preamble: Latitude / Longitude / X & Y / Tile / Elevation / \"All years; ...\" / cite line\n",
|
||||
" lines = text.splitlines()\n",
|
||||
" header = {'url': url}\n",
|
||||
" data_start = None\n",
|
||||
" for i, line in enumerate(lines):\n",
|
||||
" if line.strip().startswith('year,'):\n",
|
||||
" data_start = i\n",
|
||||
" break\n",
|
||||
" if ':' in line:\n",
|
||||
" k, v = line.split(':', 1)\n",
|
||||
" header[k.strip()] = v.strip()\n",
|
||||
" if data_start is None:\n",
|
||||
" raise RuntimeError(f'Daymet CSV missing column header. First 500 chars: {text[:500]}')\n",
|
||||
" df = pd.read_csv(StringIO('\\n'.join(lines[data_start:])))\n",
|
||||
" # Normalize column names: 'prcp (mm/day)' -> 'prcp', etc.\n",
|
||||
" df.columns = [c.split(' ')[0] for c in df.columns]\n",
|
||||
" # Daymet uses 365-day calendar (Dec 31 dropped on leap years), yday is 1..365\n",
|
||||
" df['date'] = pd.to_datetime(df['year'].astype(str), format='%Y') + pd.to_timedelta(df['yday'] - 1, unit='D')\n",
|
||||
" return header, df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def fetch_gridmet_wind(lat, lon):\n",
|
||||
" \"\"\"Return (meta_dict, dataframe[date, wind_speed_ms]) covering START_YEAR..END_YEAR.\n",
|
||||
"\n",
|
||||
" Issues one NCSS CSV request per year (~30 requests, ~3 KB each).\"\"\"\n",
|
||||
" frames = []\n",
|
||||
" grid_lat = grid_lon = None\n",
|
||||
" for year in range(START_YEAR, END_YEAR + 1):\n",
|
||||
" url = (GRIDMET_NCSS_URL_TEMPLATE.format(year=year) +\n",
|
||||
" f'?var={GRIDMET_VARIABLE}'\n",
|
||||
" f'&latitude={float(lat):.6f}&longitude={float(lon):.6f}'\n",
|
||||
" f'&time_start={year}-01-01T00:00:00Z&time_end={year}-12-31T23:59:59Z'\n",
|
||||
" f'&accept=csv')\n",
|
||||
" text = http_get(url)\n",
|
||||
" df = pd.read_csv(StringIO(text))\n",
|
||||
" # NCSS column names look like: time, latitude[unit=\"...\"], longitude[..], wind_speed[unit=\"m/s\"]\n",
|
||||
" df = df.rename(columns={c: c.split('[')[0].strip() for c in df.columns})\n",
|
||||
" df['date'] = pd.to_datetime(df['time']).dt.tz_localize(None).dt.normalize()\n",
|
||||
" # NCSS CSV does NOT apply scale_factor for packed shorts; apply here.\n",
|
||||
" df['wind_speed_ms'] = pd.to_numeric(df['wind_speed'], errors='coerce') * GRIDMET_SCALE_FACTOR\n",
|
||||
" if grid_lat is None and 'latitude' in df.columns:\n",
|
||||
" grid_lat = float(df['latitude'].iloc[0])\n",
|
||||
" grid_lon = float(df['longitude'].iloc[0])\n",
|
||||
" frames.append(df[['date', 'wind_speed_ms']])\n",
|
||||
" return {'grid_lat': grid_lat, 'grid_lon': grid_lon}, pd.concat(frames, ignore_index=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def saturation_vapor_pressure_pa(t_c):\n",
|
||||
" \"\"\"Magnus / Tetens. T in °C, returns saturation vapor pressure in Pa.\"\"\"\n",
|
||||
" return 611.2 * np.exp(17.67 * t_c / (t_c + 243.5))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def relative_humidity_pct(vp_pa, t_c):\n",
|
||||
" e_sat = saturation_vapor_pressure_pa(t_c)\n",
|
||||
" rh = 100.0 * vp_pa / e_sat\n",
|
||||
" return np.clip(rh, 0.0, 100.0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def wet_bulb_temperature_c(t_c, rh_pct):\n",
|
||||
" \"\"\"Stull (2011) approximation. Valid for 5 ≤ RH ≤ 99 %, −20 ≤ T ≤ +50 °C.\"\"\"\n",
|
||||
" rh = np.clip(rh_pct, 5.0, 99.0)\n",
|
||||
" return (\n",
|
||||
" t_c * np.arctan(0.151977 * np.sqrt(rh + 8.313659))\n",
|
||||
" + np.arctan(t_c + rh)\n",
|
||||
" - np.arctan(rh - 1.676331)\n",
|
||||
" + 0.00391838 * (rh ** 1.5) * np.arctan(0.023101 * rh)\n",
|
||||
" - 4.686035\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _parse_daymet_header(header):\n",
|
||||
" def f(s):\n",
|
||||
" try: return float(str(s).split()[0])\n",
|
||||
" except Exception: return None\n",
|
||||
" elevation_m = f(header.get('Elevation', '').replace('meters', '').strip())\n",
|
||||
" tile = None\n",
|
||||
" try: tile = int(header.get('Tile', '').strip())\n",
|
||||
" except Exception: pass\n",
|
||||
" xy = header.get('X & Y on Lambert Conformal Conic', '')\n",
|
||||
" daymet_x = daymet_y = None\n",
|
||||
" if xy:\n",
|
||||
" parts = xy.split()\n",
|
||||
" try:\n",
|
||||
" daymet_x = float(parts[0]); daymet_y = float(parts[1])\n",
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
" return tile, elevation_m, daymet_x, daymet_y\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def summarize_site(dc_row, daymet_header, daymet_df, wind_info):\n",
|
||||
" df = daymet_df.copy()\n",
|
||||
" df['tmean'] = (df['tmin'] + df['tmax']) / 2.0\n",
|
||||
" df['year'] = df['date'].dt.year\n",
|
||||
" df['month'] = df['date'].dt.month\n",
|
||||
"\n",
|
||||
" df['rh_mean_pct'] = relative_humidity_pct(df['vp'], df['tmean'])\n",
|
||||
" df['rh_at_tmax_pct'] = relative_humidity_pct(df['vp'], df['tmax'])\n",
|
||||
" df['wet_bulb_mean_c'] = wet_bulb_temperature_c(df['tmean'], df['rh_mean_pct'])\n",
|
||||
" df['wet_bulb_max_c'] = wet_bulb_temperature_c(df['tmax'], df['rh_at_tmax_pct'])\n",
|
||||
"\n",
|
||||
" df['cdd_c'] = (df['tmean'] - CDD_BASE_C).clip(lower=0)\n",
|
||||
" df['extreme_heat'] = df['tmax'] >= EXTREME_HEAT_THRESHOLD_C\n",
|
||||
" df['extreme_wet_bulb'] = df['wet_bulb_max_c'] >= EXTREME_WET_BULB_THRESHOLD_C\n",
|
||||
" df['dtr_c'] = df['tmax'] - df['tmin']\n",
|
||||
"\n",
|
||||
" annual = (df.groupby('year', as_index=False)\n",
|
||||
" .agg(annual_mean_t_c=('tmean','mean'),\n",
|
||||
" annual_cdd=('cdd_c','sum'),\n",
|
||||
" annual_extreme_heat=('extreme_heat','sum'),\n",
|
||||
" annual_prcp_mm=('prcp','sum')))\n",
|
||||
" summer = df.loc[df['month'].isin([6, 7, 8])]\n",
|
||||
" wet_days = df.loc[df['prcp'] >= WET_DAY_THRESHOLD_MM, 'prcp']\n",
|
||||
"\n",
|
||||
" annual_prcp = annual['annual_prcp_mm'].dropna()\n",
|
||||
" precip_cv = float(annual_prcp.std(ddof=1) / annual_prcp.mean()) if len(annual_prcp) > 1 and annual_prcp.mean() != 0 else np.nan\n",
|
||||
"\n",
|
||||
" tile, elevation_m, daymet_x, daymet_y = _parse_daymet_header(daymet_header)\n",
|
||||
"\n",
|
||||
" row = {\n",
|
||||
" 'master_id': dc_row.master_id,\n",
|
||||
" 'source': dc_row.source,\n",
|
||||
" 'name': dc_row.name,\n",
|
||||
" 'operator': dc_row.operator,\n",
|
||||
" 'city': dc_row.city,\n",
|
||||
" 'state': dc_row.state,\n",
|
||||
" 'country': dc_row.country,\n",
|
||||
" 'longitude': float(dc_row.longitude),\n",
|
||||
" 'latitude': float(dc_row.latitude),\n",
|
||||
"\n",
|
||||
" 'daymet_dataset_version': DAYMET_DATASET_VERSION,\n",
|
||||
" 'gridmet_dataset_version': GRIDMET_DATASET_VERSION if wind_info is not None else None,\n",
|
||||
" 'climate_period_start': START_DATE,\n",
|
||||
" 'climate_period_end': END_DATE,\n",
|
||||
" 'daymet_tile': tile,\n",
|
||||
" 'daymet_elevation_m': elevation_m,\n",
|
||||
" 'daymet_grid_x_m': daymet_x,\n",
|
||||
" 'daymet_grid_y_m': daymet_y,\n",
|
||||
" 'gridmet_grid_latitude': wind_info['grid_lat'] if wind_info is not None else None,\n",
|
||||
" 'gridmet_grid_longitude': wind_info['grid_lon'] if wind_info is not None else None,\n",
|
||||
" 'observation_days': int(len(df)),\n",
|
||||
" 'observation_years': int(df['year'].nunique()),\n",
|
||||
"\n",
|
||||
" 'mean_annual_temperature_c': float(annual['annual_mean_t_c'].mean()),\n",
|
||||
" 'mean_summer_temperature_c': float(summer['tmean'].mean()) if len(summer) else np.nan,\n",
|
||||
" 'max_daily_temperature_c': float(df['tmax'].max()),\n",
|
||||
" 'min_daily_temperature_c': float(df['tmin'].min()),\n",
|
||||
" 'mean_diurnal_temperature_range_c': float(df['dtr_c'].mean()),\n",
|
||||
"\n",
|
||||
" 'mean_relative_humidity_pct': float(df['rh_mean_pct'].mean()),\n",
|
||||
" 'mean_wet_bulb_temperature_c': float(df['wet_bulb_mean_c'].mean()),\n",
|
||||
" 'max_wet_bulb_temperature_c': float(df['wet_bulb_max_c'].max()),\n",
|
||||
" 'extreme_wet_bulb_days': int(df['extreme_wet_bulb'].sum()),\n",
|
||||
"\n",
|
||||
" 'cooling_degree_days_c': float(df['cdd_c'].sum()),\n",
|
||||
" 'annual_cooling_degree_days_c_mean': float(annual['annual_cdd'].mean()),\n",
|
||||
" 'extreme_heat_days': int(df['extreme_heat'].sum()),\n",
|
||||
" 'annual_extreme_heat_days_mean': float(annual['annual_extreme_heat'].mean()),\n",
|
||||
"\n",
|
||||
" 'precipitation_total_mm': float(df['prcp'].sum()),\n",
|
||||
" 'annual_precipitation_mm_mean': float(annual['annual_prcp_mm'].mean()),\n",
|
||||
" 'annual_precipitation_cv': precip_cv,\n",
|
||||
" 'wet_day_precipitation_p95_mm': float(wet_days.quantile(0.95)) if len(wet_days) else 0.0,\n",
|
||||
"\n",
|
||||
" 'mean_wind_speed_ms': None,\n",
|
||||
" 'max_daily_mean_wind_speed_ms': None,\n",
|
||||
" 'sustained_wind_days': None,\n",
|
||||
" 'annual_sustained_wind_days_mean': None,\n",
|
||||
" 'wind_status': 'pending_gridmet',\n",
|
||||
"\n",
|
||||
" 'drought_severity_index_source_status': 'needs_external_source',\n",
|
||||
" 'wildfire_smoke_exposure_source_status': 'needs_external_source',\n",
|
||||
"\n",
|
||||
" 'daymet_variables': DAYMET_VARIABLES,\n",
|
||||
" 'daymet_url': daymet_header.get('url'),\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" if wind_info is not None:\n",
|
||||
" wdf = wind_info['data'].copy()\n",
|
||||
" wdf['year'] = wdf['date'].dt.year\n",
|
||||
" wdf['sustained'] = wdf['wind_speed_ms'] >= SUSTAINED_WIND_THRESHOLD_MS\n",
|
||||
" annual_wind = wdf.groupby('year', as_index=False).agg(d=('sustained', 'sum'))\n",
|
||||
" row.update({\n",
|
||||
" 'mean_wind_speed_ms': float(wdf['wind_speed_ms'].mean()),\n",
|
||||
" 'max_daily_mean_wind_speed_ms': float(wdf['wind_speed_ms'].max()),\n",
|
||||
" 'sustained_wind_days': int(wdf['sustained'].sum()),\n",
|
||||
" 'annual_sustained_wind_days_mean': float(annual_wind['d'].mean()),\n",
|
||||
" 'wind_status': 'gridmet',\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" return row\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "11",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Upsert Helper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "12",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"UPSERT_COLUMNS = [\n",
|
||||
" 'master_id', 'source', 'name', 'operator', 'city', 'state', 'country',\n",
|
||||
" 'longitude', 'latitude',\n",
|
||||
" 'daymet_dataset_version', 'gridmet_dataset_version',\n",
|
||||
" 'climate_period_start', 'climate_period_end',\n",
|
||||
" 'daymet_tile', 'daymet_elevation_m', 'daymet_grid_x_m', 'daymet_grid_y_m',\n",
|
||||
" 'gridmet_grid_latitude', 'gridmet_grid_longitude',\n",
|
||||
" 'observation_days', 'observation_years',\n",
|
||||
" 'mean_annual_temperature_c', 'mean_summer_temperature_c',\n",
|
||||
" 'max_daily_temperature_c', 'min_daily_temperature_c',\n",
|
||||
" 'mean_diurnal_temperature_range_c',\n",
|
||||
" 'mean_relative_humidity_pct',\n",
|
||||
" 'mean_wet_bulb_temperature_c', 'max_wet_bulb_temperature_c',\n",
|
||||
" 'extreme_wet_bulb_days',\n",
|
||||
" 'cooling_degree_days_c', 'annual_cooling_degree_days_c_mean',\n",
|
||||
" 'extreme_heat_days', 'annual_extreme_heat_days_mean',\n",
|
||||
" 'precipitation_total_mm', 'annual_precipitation_mm_mean',\n",
|
||||
" 'annual_precipitation_cv', 'wet_day_precipitation_p95_mm',\n",
|
||||
" 'mean_wind_speed_ms', 'max_daily_mean_wind_speed_ms',\n",
|
||||
" 'sustained_wind_days', 'annual_sustained_wind_days_mean',\n",
|
||||
" 'wind_status',\n",
|
||||
" 'drought_severity_index_source_status',\n",
|
||||
" 'wildfire_smoke_exposure_source_status',\n",
|
||||
" 'daymet_variables', 'daymet_url',\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def clean_db_value(value):\n",
|
||||
" if isinstance(value, (list, tuple)):\n",
|
||||
" return list(value)\n",
|
||||
" if value is None:\n",
|
||||
" return None\n",
|
||||
" if isinstance(value, float) and np.isnan(value):\n",
|
||||
" return None\n",
|
||||
" try:\n",
|
||||
" if pd.isna(value):\n",
|
||||
" return None\n",
|
||||
" except (TypeError, ValueError):\n",
|
||||
" pass\n",
|
||||
" return value\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def upsert_climate_rows(rows):\n",
|
||||
" if not rows:\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" values = [\n",
|
||||
" tuple(clean_db_value(row.get(col)) for col in UPSERT_COLUMNS)\n",
|
||||
" for row in rows\n",
|
||||
" ]\n",
|
||||
" col_sql = sql.SQL(', ').join(map(sql.Identifier, UPSERT_COLUMNS))\n",
|
||||
" update_assignments = sql.SQL(', ').join(\n",
|
||||
" sql.SQL('{} = excluded.{}').format(sql.Identifier(col), sql.Identifier(col))\n",
|
||||
" for col in UPSERT_COLUMNS if col != 'master_id'\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" insert_sql = sql.SQL('''\n",
|
||||
" insert into {table} ({cols}, geom, updated_at)\n",
|
||||
" values %s\n",
|
||||
" on conflict (master_id) do update set\n",
|
||||
" {updates},\n",
|
||||
" geom = excluded.geom,\n",
|
||||
" fetched_at = now(),\n",
|
||||
" updated_at = now()\n",
|
||||
" ''').format(\n",
|
||||
" table=sql.SQL(CLIMATE_TABLE),\n",
|
||||
" cols=col_sql,\n",
|
||||
" updates=update_assignments,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" template = '(' + ', '.join(['%s'] * len(UPSERT_COLUMNS)) + ', ST_SetSRID(ST_MakePoint(%s, %s), 4326), now())'\n",
|
||||
" lon_i = UPSERT_COLUMNS.index('longitude')\n",
|
||||
" lat_i = UPSERT_COLUMNS.index('latitude')\n",
|
||||
" values_with_geom = [r + (r[lon_i], r[lat_i]) for r in values]\n",
|
||||
"\n",
|
||||
" with get_conn() as conn:\n",
|
||||
" with conn.cursor() as cur:\n",
|
||||
" execute_values(cur, insert_sql.as_string(conn), values_with_geom, template=template, page_size=500)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fetch + Upsert (per site, with checkpointing)\n",
|
||||
"\n",
|
||||
"Each site is fetched, summarized, **and upserted to Postgres immediately**. That way:\n",
|
||||
"\n",
|
||||
"- A mid-run crash never loses more than the current site.\n",
|
||||
"- Re-running with `REFRESH_EXISTING=False` skips everything already in `data_center_historical_climate` (filtered in `load_target_points`).\n",
|
||||
"- The CSV checkpoint at `output/historical_climate_checkpoint.csv` is purely a debug artifact; the DB is the source of truth.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if targets.empty:\n",
|
||||
" climate_rows = []\n",
|
||||
" print('No new target rows. Set REFRESH_EXISTING=True to recompute existing rows.')\n",
|
||||
"else:\n",
|
||||
" climate_rows = []\n",
|
||||
" errors = []\n",
|
||||
" total = len(targets)\n",
|
||||
" checkpoint_path = Path('output/historical_climate_checkpoint.csv')\n",
|
||||
" checkpoint_path.parent.mkdir(exist_ok=True)\n",
|
||||
"\n",
|
||||
" for i, dc_row in enumerate(targets.itertuples(index=False), start=1):\n",
|
||||
" try:\n",
|
||||
" daymet_header, daymet_df = fetch_daymet(dc_row.latitude, dc_row.longitude)\n",
|
||||
" wind_info = None\n",
|
||||
" if INCLUDE_WIND:\n",
|
||||
" grid_meta, wind_df = fetch_gridmet_wind(dc_row.latitude, dc_row.longitude)\n",
|
||||
" wind_info = {\n",
|
||||
" 'grid_lat': grid_meta['grid_lat'],\n",
|
||||
" 'grid_lon': grid_meta['grid_lon'],\n",
|
||||
" 'data': wind_df,\n",
|
||||
" }\n",
|
||||
" row = summarize_site(dc_row, daymet_header, daymet_df, wind_info)\n",
|
||||
" climate_rows.append(row)\n",
|
||||
"\n",
|
||||
" if not DRY_RUN:\n",
|
||||
" upsert_climate_rows([row])\n",
|
||||
" pd.DataFrame(climate_rows).to_csv(checkpoint_path, index=False)\n",
|
||||
"\n",
|
||||
" print(f'{i:>5,}/{total:,} {dc_row.master_id} '\n",
|
||||
" f'T_mean={row[\"mean_annual_temperature_c\"]:.2f}°C '\n",
|
||||
" f'CDD={row[\"cooling_degree_days_c\"]:,.0f} '\n",
|
||||
" f'wind={row[\"wind_status\"]} '\n",
|
||||
" f'successes={len(climate_rows):,} errors={len(errors):,}')\n",
|
||||
" except Exception as exc:\n",
|
||||
" errors.append({'master_id': dc_row.master_id, 'error': str(exc)})\n",
|
||||
" print(f'ERROR {i:>5,}/{total:,} ({dc_row.master_id}): {exc}')\n",
|
||||
"\n",
|
||||
" time.sleep(REQUEST_SLEEP_SECONDS)\n",
|
||||
"\n",
|
||||
" climate = pd.DataFrame(climate_rows)\n",
|
||||
" error_log = pd.DataFrame(errors)\n",
|
||||
" print(f'\\nDone: {len(climate):,} successes / {len(error_log):,} errors')\n",
|
||||
" if not climate.empty:\n",
|
||||
" display(climate.head())\n",
|
||||
" if not error_log.empty:\n",
|
||||
" display(error_log)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Inspect Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summary_query = f'''\n",
|
||||
"select\n",
|
||||
" count(*) as rows,\n",
|
||||
" min(climate_period_start) as min_period_start,\n",
|
||||
" max(climate_period_end) as max_period_end,\n",
|
||||
" count(*) filter (where wind_status = 'gridmet') as with_wind,\n",
|
||||
" count(*) filter (where wind_status = 'pending_gridmet') as wind_pending,\n",
|
||||
" count(*) filter (where drought_severity_index_source_status = 'needs_external_source') as drought_external_needed,\n",
|
||||
" count(*) filter (where wildfire_smoke_exposure_source_status = 'needs_external_source') as smoke_external_needed\n",
|
||||
"from {CLIMATE_TABLE}\n",
|
||||
"where daymet_dataset_version = %s\n",
|
||||
" and climate_period_start = %s::date\n",
|
||||
" and climate_period_end = %s::date\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"sample_query = f'''\n",
|
||||
"select\n",
|
||||
" c.master_id, c.name, c.state,\n",
|
||||
" round(c.mean_annual_temperature_c::numeric, 2) as mean_t_c,\n",
|
||||
" round(c.mean_summer_temperature_c::numeric, 2) as summer_t_c,\n",
|
||||
" round(c.mean_relative_humidity_pct::numeric, 1) as rh_pct,\n",
|
||||
" round(c.mean_wet_bulb_temperature_c::numeric, 2) as wb_mean_c,\n",
|
||||
" round(c.cooling_degree_days_c::numeric, 0) as cdd_total,\n",
|
||||
" c.extreme_heat_days,\n",
|
||||
" round(c.annual_precipitation_cv::numeric, 3) as precip_cv,\n",
|
||||
" round(c.mean_wind_speed_ms::numeric, 2) as wind_mean_ms,\n",
|
||||
" c.sustained_wind_days,\n",
|
||||
" c.wind_status\n",
|
||||
"from {CLIMATE_TABLE} c\n",
|
||||
"order by c.cooling_degree_days_c desc nulls last\n",
|
||||
"limit 20\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"with get_conn() as conn:\n",
|
||||
" summary = pd.read_sql_query(summary_query, conn, params=(DAYMET_DATASET_VERSION, START_DATE, END_DATE))\n",
|
||||
" sample = pd.read_sql_query(sample_query, conn)\n",
|
||||
"\n",
|
||||
"display(summary)\n",
|
||||
"display(sample)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Notes for the Next Data Sources\n",
|
||||
"\n",
|
||||
"Daymet (T/precip/VP at 1 km) + gridMET (wind at 4 km) cover the core weather/climate burden metrics for CONUS data centers. Two metrics still need better external sources:\n",
|
||||
"\n",
|
||||
"- **Drought severity index** — gridMET also publishes `pdsi` (Palmer Drought Severity Index) and `etr` (reference ET) at the same THREDDS server. A follow-up cell can pull PDSI percentiles using the same NCSS pattern as `vs` here. Alternatively, the US Drought Monitor (USDM) shapefiles give weekly drought categories that can be spatially joined to the DC point.\n",
|
||||
"- **Wildfire smoke exposure** — NOAA HMS smoke shapefiles (daily plume polygons, 2005–present) or EPA AirNow PM2.5 monitor data. Smoke-day counts would be directly comparable to the heat-day metric here.\n",
|
||||
"\n",
|
||||
"**Metric change vs. the prior Open-Meteo implementation.** The old `windstorm_days` field counted days where the daily *max wind gust* exceeded 17.2 m/s (~38 mph, ERA5 reanalysis gust). gridMET reports only daily *mean* wind, not gusts, so the new `sustained_wind_days` field counts days with daily-mean wind ≥ 8 m/s (~18 mph). These are fundamentally different signals — do not compare values across the two columns. If true gust/storm event counts are needed, NOAA's Storm Events Database is the appropriate next source.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.14.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
14
output/historical_climate_checkpoint.csv
Normal file
14
output/historical_climate_checkpoint.csv
Normal file
@@ -0,0 +1,14 @@
|
||||
master_id,source,name,operator,city,state,country,longitude,latitude,daymet_dataset_version,gridmet_dataset_version,climate_period_start,climate_period_end,daymet_tile,daymet_elevation_m,daymet_grid_x_m,daymet_grid_y_m,gridmet_grid_latitude,gridmet_grid_longitude,observation_days,observation_years,mean_annual_temperature_c,mean_summer_temperature_c,max_daily_temperature_c,min_daily_temperature_c,mean_diurnal_temperature_range_c,mean_relative_humidity_pct,mean_wet_bulb_temperature_c,max_wet_bulb_temperature_c,extreme_wet_bulb_days,cooling_degree_days_c,annual_cooling_degree_days_c_mean,extreme_heat_days,annual_extreme_heat_days_mean,precipitation_total_mm,annual_precipitation_mm_mean,annual_precipitation_cv,wet_day_precipitation_p95_mm,mean_wind_speed_ms,max_daily_mean_wind_speed_ms,sustained_wind_days,annual_sustained_wind_days_mean,wind_status,drought_severity_index_source_status,wildfire_smoke_exposure_source_status,daymet_variables,daymet_url
|
||||
curated/0334264478,merged,Terminal Commerce Building,,Philadelphia,PA,United States,-75.16083249176855,39.959809526359834,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11573,24.0,1997266.57,30406.34,39.95981,-75.160832,10950,30,13.0764200913242,24.108543478260867,39.26,-21.31,10.672155251141554,70.36852671711473,9.856640970016384,29.9621687216794,29,20429.369999999995,680.9789999999999,136,4.533333333333333,38118.32,1270.6106666666667,0.18090604364227492,31.89199999999997,4.084504471618909,12.5,319.0,10.633333333333333,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=39.959810&lon=-75.160832&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/0335468616,merged,TierPoint,,,WA,United States,-117.09593477622663,47.67191686714767,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,12272,630.0,-1219024.56,673810.47,47.671917,-117.095935,10950,30,9.016546575342465,19.093291666666666,40.09,-24.9,11.943931506849315,60.23195446572226,4.710395504299232,23.37884791052112,0,6053.609999999999,201.78699999999992,138,4.6,17468.8,582.2933333333333,0.1747350658066581,14.115999999999985,3.2801697390034685,11.5,70.0,2.3333333333333335,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=47.671917&lon=-117.095935&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/0336387583,merged,Baltimore Technology Park,,Baltimore,MD,United States,-76.62552663391301,39.27538391165092,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11572,9.0,1902773.82,-73433.06,39.275384,-76.625527,10950,30,14.10395799086758,24.99128079710145,39.98,-20.47,10.228728767123286,71.35921072562688,10.911169237092297,30.943426341757874,42,24231.6,807.72,223,7.433333333333334,34409.44,1146.9813333333332,0.20849552531726018,34.736,3.715641540427085,13.5,198.0,6.6,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=39.275384&lon=-76.625527&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/0337126739,merged,Intergate Seattle West Building C,Sabey,Tukwila,WA,United States,-122.29440690516351,47.494131187713045,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,12269,76.0,-1586884.55,742889.74,47.494131,-122.294407,10950,30,11.485044292237442,18.09163224637681,39.32,-9.97,8.688176255707763,71.0624487473783,8.27376698621232,23.00645940700761,0,3180.924999999999,106.03083333333329,10,0.3333333333333333,32506.78,1083.5593333333334,0.176673934909212,21.625499999999985,4.2742288738820955,15.5,893.0,29.766666666666666,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=47.494131&lon=-122.294407&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/0337126741,merged,Intergate Seattle West Building B,Sabey,Tukwila,WA,United States,-122.29368392327885,47.49307629033908,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,12269,76.0,-1586863.93,742767.84,47.494131,-122.294407,10950,30,11.485044292237442,18.09163224637681,39.32,-9.97,8.688176255707763,71.0624487473783,8.27376698621232,23.00645940700761,0,3180.924999999999,106.03083333333329,10,0.3333333333333333,32506.78,1083.5593333333334,0.176673934909212,21.625499999999985,4.2742288738820955,15.5,893.0,29.766666666666666,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=47.493076&lon=-122.293684&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/10014176940,merged,Microsoft,Microsoft,Guaynabo,PR,United States,-66.10898950000002,18.417435900000033,daymet_v4r1,,1991-01-01,2020-12-31,9777,16.0,3621410.85,-1879111.77,,,10950,30,25.81547488584475,27.144874999999992,34.91,15.07,9.11396894977169,75.78506213352443,22.51809641313038,27.63820127157118,0,82294.45,2743.148333333333,0,0.0,54395.869999999995,1813.195666666667,0.18200132344404926,28.888999999999996,,,,,unavailable_outside_conus,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=18.417436&lon=-66.108990&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/1219083552,merged,,,,IL,United States,-87.97218622171745,42.02314332706368,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11927,213.0,946754.31,17864.2,42.023143,-87.972186,10950,30,9.97390776255708,22.144297101449272,39.74,-30.46,10.23993789954338,70.50654029097984,6.957823466604632,29.99370463545738,7,13932.794999999998,464.4264999999999,49,1.6333333333333333,30357.16,1011.9053333333334,0.16092047671431148,29.023999999999976,4.649790107683884,13.5,541.0,18.033333333333335,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=42.023143&lon=-87.972186&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/1219083554,merged,T5 Chicago II,T5,Elk Grove Village,IL,United States,-87.97555687597257,42.0230423332657,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11927,213.0,946492.34,17815.36,42.023143,-87.972186,10950,30,9.97390776255708,22.144297101449272,39.74,-30.46,10.23993789954338,70.50654029097984,6.957823466604632,29.99370463545738,7,13932.794999999998,464.4264999999999,49,1.6333333333333333,30357.16,1011.9053333333334,0.16092047671431148,29.023999999999976,4.649790107683884,13.5,541.0,18.033333333333335,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=42.023042&lon=-87.975557&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/1221862807,merged,AWS IAD106,Amazon Web Services,,VA,United States,-77.51852812758844,38.74697497598997,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11572,63.0,1846819.9,-147489.22,38.746975,-77.518528,10950,30,13.476520547945201,24.138014492753623,39.12,-21.22,11.741119634703196,67.90200557268788,9.986608175884161,28.819002377816997,11,20837.155,694.5718333333332,158,5.266666666666667,35247.11,1174.9036666666666,0.18100162932466146,31.305999999999983,3.9398339112976823,15.8,251.0,8.366666666666667,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=38.746975&lon=-77.518528&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/1222007663,merged,Google,Google,,OH,United States,-82.76452657909626,40.05963281203005,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11749,331.0,1394405.54,-114024.63,40.059633,-82.764527,10950,30,10.846063470319633,21.955298913043478,36.59,-32.84,11.388786301369864,68.46078792715129,7.576812559382761,26.776242217926022,0,13165.859999999997,438.86199999999985,10,0.3333333333333333,35712.899999999994,1190.4299999999998,0.13646519991876516,25.45099999999998,4.499023544442417,13.4,511.0,17.033333333333335,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=40.059633&lon=-82.764527&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/9721703993,merged,TierPoint Milwaukee,,,WI,United States,-87.9610618,43.00937789999995,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11927,200.0,932598.66,121371.64,43.009378,-87.961062,10950,30,8.919163013698629,21.042980072463767,40.5,-32.12,9.755436529680365,71.38282568630439,6.0722708536298375,28.858298868205022,3,10552.029999999999,351.7343333333334,24,0.8,27385.6,912.8533333333336,0.15443990630439305,26.619999999999944,5.007145464500821,14.200000000000001,902.0,30.066666666666666,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=43.009378&lon=-87.961062&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
curated/9736262934,merged,Frontier Science,,Amherst,NY,United States,-78.8123836,42.990201099999986,daymet_v4r1,gridmet_vs,1991-01-01,2020-12-31,11931,180.0,1629871.78,260041.46,42.990201,-78.812384,10950,30,9.30234885844749,20.74546739130435,35.38,-24.55,9.541263926940639,72.05315950094813,6.4648431777879365,26.91148035798725,0,9479.474999999999,315.98249999999996,1,0.03333333333333333,30978.7,1032.6233333333334,0.13719318866566355,21.611499999999992,4.415814929731703,14.700000000000001,654.0,21.8,gridmet,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=42.990201&lon=-78.812384&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
osm/way/365941471,osm,,Claro,,PR,US,-66.0516873,18.446827,daymet_v4r1,,1991-01-01,2020-12-31,9777,11.0,3625853.61,-1873516.13,,,10950,30,25.889257534246575,27.205469202898552,35.01,15.38,8.787603652968036,76.53890427533007,22.69437372495814,27.817986972115694,0,83102.37,2770.0789999999993,1,0.03333333333333333,53693.689999999995,1789.7896666666666,0.17483046745643102,28.772000000000006,,,,,unavailable_outside_conus,needs_external_source,needs_external_source,"['tmin', 'tmax', 'prcp', 'vp']",https://daymet.ornl.gov/single-pixel/api/data?lat=18.446827&lon=-66.051687&vars=tmin%2Ctmax%2Cprcp%2Cvp&start=1991-01-01&end=2020-12-31&format=csv
|
||||
|
1210
rdh_precinct_vote_data_centers.ipynb
Normal file
1210
rdh_precinct_vote_data_centers.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user