From b442998eb5fb631d17c8be72a47244f899a0f7a8 Mon Sep 17 00:00:00 2001 From: dadams Date: Fri, 15 May 2026 20:53:42 -0700 Subject: [PATCH] update ingest_eia --- .../ingest_eia_energy_layers.cpython-314.pyc | Bin 35923 -> 39938 bytes ingest_eia_energy_layers.py | 97 +++++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/__pycache__/ingest_eia_energy_layers.cpython-314.pyc b/__pycache__/ingest_eia_energy_layers.cpython-314.pyc index abd9810d75faf02be9e6ad5a247a1139cedcf001..0756fadf4d8b5fa0858f78dc6aa1096b9387f6a2 100644 GIT binary patch delta 7589 zcmbtZ4Rljgo_{Z2ZIU+qN}7J8&1*|(18v$;z*<^Lfg%W$&IK!slo*oN5=fhH-wV*H zv?3x1Qt^%oq9VF8qmDDOW{#_KX2wyMWp-A2K%7I(>Jj%Gcik^fRn!$`j{E=Lm$bzh z@$A`${pEiA@Bi!m@BQCL{w=M|l8wk^{lz*q4fX;xO@$Sx}ixY5;4A7qE`h12&L3fQ@7>;5;%P@CMQZ zcq3^BTtHd?Tgk%BA_!_Di=fs{76UFJO94B`GQgY2a=;bjX26wX72s;p3Al!I0serH z&BG#DOV&Z<7SauPD_IY?f&38gHnI`0hin4$k=r+mOHh=bY=(M(1Oa8z3)n}t0EWm` zz-=T9*iQxkBP0sAolro9sDLrD18^tV1vp5C0Ph$kcf!wIWH;b283DYT>_JiF9beylN^BZ{k&X7eohWT?GV<&qFPOU zK^}nm;bh%IUL>PXAAT^|FpC_4>O*|+Y{H=YBVMi{4@3D8Ualpl$fHm@njGUL$DD#% zM;-&LC&!`ZaZcAjo`CX`ygY}Tfbvtk+(=FWLG=5>+3fy$40Wj^@{bl*UJ z3N6p_mL~EXwA@IZ2W;+HKz`P=UJ!z?tRUW-%Y>8{U?qta{Lk~Z@kNF|>C!x6La%HO z$6L}~){9%%=a#p`4E8(g!%*(dc*JxsxR35*pJgm5x*v)PuBt`!AhzyK!4|eAv&?ju zSJ`i~y5`*t#e{K(KOB=2CMv5jIzaD(HjhXTzz=6qt}!`;qd2gunHA#UxZ5^MtUihi z(gI=GKOhGcoXMFIr%$9Q;aFd&caT1V1KIYhRpMckS&uU59)x2E-zRR6EzAaS%xx=- z-;rG}rXPn<9us|r#dB)JJ?v6Wk$8|@&2bq|LB}9-<@N#f-rRo??`C~@o)#41G1C1g z%ZOk?NJB6HT+h-y?5(^_VCihTTPMcZ%KR0gg*}|V!TJ)8jwAe%eVsqQ5WSqp+#ZwZ zpid6@eg0^OjzQa=_;TmC*uyQxF$rtGf0vKSfd~yM^q0tht7qcsCG?>IRnrkQn^N?8 zIv!_t6|Msv-!5FMcfAV3I@cvkk=|ZKR_QBH_2^QHwwG)a$Jmo4?ipv0`5+-_sKpbG$hjtth98lm3!ik*#8de zDJ>Jn*>Gt^33p0PudD{PaHpiq{s;(<$6qVe=~}pBGH@VwgI=2@Zal{9_3R#Jf0nAj z?wMt0#Mw4v@iGDi2StuBDSlY)5Dk^88?TIuhN5lRA1in8SMVnD%!l*>U zA(g%cWltK-V0{%8PHsPPQ!s31O8lb-*~yB0<7H?WXTPraH3Z$+%HCGKq732CfP9Tj zJo3KJrU~Y%1e@5CGu!eMr>ddsa`gS4Pu^}`xTSXBS!uD;p-{*4ZGRX#JARfp55MIk3a7DsmY`*n)r(PS)w%$`RK!bmD=OG z8`g;8FdLiW%jWBl$0jZedWN+&x*AVoF@Xb&z2OK9c@G=j#|Cr9u7Dg>Ly-YknFg?r z8WGO1LyZOE2kgg))zr1`3@)P}8D05C_nShfR9Zt-k?YnibYS~js?wEkXO$y^bx5?_JIsr|mFr|hML zvq0$k3$GeBO$wd1diR00u$;LL*NV_o!l<7KUo!F7V^Kw>sulsh0L*hZ(nssj0ycl? z`?K#x<2j%Eknwdg-BGr%!v&h%*736Fo(&8<`e|Blci>Xul3D#tlBE%7j>4ZJvjs~_ z*{+-3(Xt(vhJi%|O6PqRChS7~9SWA%1>{5Q*A6>l3xMGbV)If-<>twvyzIiYn; zIS18B=xJo9V$gI;x+Pl+^gcP#@8he4HUZ_+@$)zD(J!6iY+1FUbER)x`{Gp{v<6L? zp3`%v3~L5AooznOPNA(p6GyrrJ4nmftDUb-GnsGg-{<8s?CCYIwBvtR(~vHH8UNF* zhl~zXZOU9aAFD0wiJt0$DMOKY!qlhPVrTF7)D`iRW8><9Z!{I@au|FjK6BG1-OM)R z^L_fvW6$4?`n-MnM>@#&uLSzUQ6>dTVHvdr=ZmfJ9l`qz!;>DPEy#2UKq4Ky9*D#s ziJ!q)F-;2=uB^2;6jgluw59OSO&D;V6g-h&{7TwX(WS@{VIDYN`6uWd5hk*QJm;gH zBcAic8~%Rl#rSY&t=`m(gbU*1VcEE1QlUiV+FMqyZYLX(v&i-Pi>b`p91DeE-;VY7 z`{C5WXOnIKep}-EXuilUEL|ku4U)B-AyV80uNSzZs48x8Bf?igo0$74!V7x-!?&6M z@Sgn89k6r^pV#QwYs=@cmsb|DPns>dZjo&-bG2s+x9DOzuMpEMvkJoY@=Zcb(d1a6 zQ@1ix5X?fU5YWCU0HdiZc}Pp@KzkBjbXDK zQWY`IO`6x0rLmrl8~LRfTG+{5j`}j8dy+aeD)s9YHlh#tM@-$u3}KFry}qriI_(<0 zgl@wou?C!w`l5!FZ*Z5e7f5-gp#nVThQFvTB(TWZQbUKh%FepHGjyvO@n?7)kSx%b z2qDpvKJ?qAG!m7RSX72%MNke(9V^-;ofVe_-sy&xI)sW5(jhHI~-cWxuLRC2K`Bfr(<-x;T5oGrG`2Y$3RLmvn{rQUC5~`#55b)2hGw!CS2>zBV(%SyL2N1ke^4GOiqaTd zN|O=G8AwU0p`}64Pn9LqjEyarF+5#9PJ+}bRVp6KR1Qj}Tz@#6u)rFis)9kLWu|oS zMMA&!2dns{Ibm6m3VW(0f;24=*kOyHHvl9u_$o@K(1a;Uhf!00C`n{!rfTlI_QHW; zG^>0X1HMRosU=5oi{<<9GAKaG$Pg^H(c%Neqa}w*j#iAPTSu)2tP{GN%-eC(OGleS@UeqMdyOIoeSQzyDnwtJ!(Aya!ihb37e2F zJ-GUEjyWfH!Y)V^C+<9U=keXo`v2&ze=y_U^4#Ng=c6l+tQ>x?p!h`EV`=B4Wdy4RivK;(=Y(_`1@i#dg`RT9QbWnVKM&UA@@%7gB?2bC~o3{4( z@N?Rb4sECNjnIF(*xFHJK3!++Xv;j^jP3uRvo6as2LwFTiPh{SrBlphqWUO{s&->G zv638iK&=(?*x#u;a__~}WvQ)&eOB&^&_Vh!Yl}6C7nu^f#dHxmX)ha#Z4i4|^^Q3` zLwU8e+hjRf%cG)}2aqz_^B!_?Y$Y z@QUZz<2xk#L2T&9C`LGs8UGD-ZpSgUa;GJ&WIU^QJaaMIxYOmrt3?3*CO=GokZTQ} zKbFRx^f==`+xdvTPw1L6NibPRrP#hQOtBm1r8>GzsT>(Kcbhi}O+SKHGf)#~@chp| zlG|;@jG3Zz3*Dx!8y@l&vHG?=qov!@bt67GXXQ;Juyic|0(;a-%=mBW9I>&LEgG`V zU2lc+Lxmvh1Z!!>G+Fj;Lxw;cn4?MloXLz18N7|z?sSXoFx;X4tMZ-K$X|jA!U4mW zky21!uyH9nztX9+Stfl#bt%-GeDK1^_hUh>uunONtba)ta!-l zV&ix9W$9Mt3j#4m)7#QRA{-F9yj3i=dya?%mQdDkEzdA7gEzD~@(m|&9`7>icY}mg*%pfW;D`rDI*N}Iw0;;7FVQpyp%39fo5up4M_sPZai+g5?x(<=)_TEvSFJ#*< zW>vy}=dvo_bJU$OTyV_Wzid3qaa24s^j@~(T*16ky{Fcn%U}3TcH4xB?cSU3%FnT1 z))!>jCguo`8C#CE9ACJ9)mQT%X<%G(aswTkSyUROsjQ=RMyP@(2X>nR-rSYvQ z7gWwVterOFS*g6!Y#h_2LCcug2rXkaJ(hEO_K`g}2Q7&-pD!2*_E^SFFSWE9+up>|Wav_rZMe@f`~)`sCqf3&yo)7*C483B{@(yp<*iR_{|+}(OP)+i}g^%AmCw4N0IB~yIUf?r9Tpk zh2@3xejtJW1f`nQ>~GY+FBHUm`_JlYzvvX<8vWT)UAgG~>}Ii76de;pFBmfR551!= QeAke^Z`r3%E#gf77i1~23IG5A delta 4911 zcmai14RBP|6@G93lHE-0M$+B#AUzU~4Zn*C) z5LB|^KT(Kq<=0rY!PZfos~cGN29lvImW^jWQT>^Qb{K*gaVgFWZI4S{I~Ub5di z=iHxj?z!ijd*9ZVbnhM3*(TVMEdo4EUnSf*ux_7iw3xB4q-P>AD(S?e*hzwtK@ycr zVpc}E46WtZ_4=oE3iC{~S6He=O|E3SvP_Aw$=iZX-ooY2J5rglXdZl@`u}clZOW@4q_uIB$cF*bYd^pRp>|t@R`7mB3Yat z4SY69D;LN!OT-FYgjTt~9BcZE(VAX$?|rLaOEd1Oq7Nb-pTa4abREF?vM z7wvyXAdPPFW?Z7N0%K(>? z6@YDICEy)o6<|AY0j?(Q4sjl;T0=a*cMvb2k4S)>#1FWZ1OV?O>i~nK3ot~&fZb$0 zASE)OLLz`0$VR|Tqz7aTS1o8mT2RJ>E{E{35?ofQpByu=QP|C>>zzT8{S{~&6CXXxa|0qB~ex^1PgWAZ5&@ zIdlM}_Y8xdg^~+QdpOHJNok!L1yVI`@B|~0YNC=7p&_~xWKNOph8LftP@Cjl;3>j- zmSZat_prXy?AQuhnpk=Wgld8}D0xDXPsW`q9?Qq5334#v4|Mj><9yuxDT`qo=$#w| zp7tReM!1>EP0W=FDr0@AO=99v7~wS0W3;;J;X&?KSeKn|EnBmZgFR(jDHHl{S>rpGMCvfx& z2r)J~t0o)0tR}CINVLZ-1w3v~cYqFotS{!z`cQO5u_umEtzDi?ZYp`h)F;yzin)0v zZl0rt!)xoXfNn!-2b)$jz6_VHCQzw6 z=RQTMQLa#HbAqsNU%m;LPl(oZ_=|RkMfxF;F zUcbN|DAp`nF)MD>B^Z3IRW>Sle40_6fuKY$L;D5ngT9LjpIMWmP28-`+9*g=g8~u9 zEqO*alB72>;s5BAA~*Z z+0wNuqClz%K9AyobWtserz0r2L!K^43wIus2CaPZ`tdIIH*|!ib2~p^-x$$Oa(IMt)%md3d zH@+x3%3uPg{u(Q|J20WRX0~L(SW6}JIRrmhVt35XXNMNNuN{BfY78;@6TpoE>KJ?Q zR*3l6A8tJ(uG)f~4fLu{)I?rm!%%4+(VFz$jM&AXWNf;sM@_zcS#$GTvNFCWT)KEn5g zw!wV<*pqaW$WCm@6a{u_ON%*miAW5@NKCA?w}8#=4Y9xU=8I|ULhpw3?O^_9%cMz= zDw01;d+3+!?r4?x0sB>Sndt*)q^p>6}3Nqv=e2uKUcYzV0cU$P5*_+o)bL&+6ZcIrZ{Au!M%l|g?-wS%|0JI z-CLx&-wd63%&L01M^+8eCb$OS?S@~c)Fc8i9q5u8DGgKYbbKBg`w+ecP|d-BthmFW zU=PK6Bi)LCRhl0n6i)@!Dz6K4y8|J=Br6n;9$L|hh$WJyBD5n6;;50R*9x2`7~}

FcGk}T8HsM^lX z?yD=sSk~@(!>HZ63|*c;h(=)8a~Q5^vAX?bqFBz>vb(BA3%2yZRsE|5UH7>Tdp}G{ z8C=xA=)BI5mT^IEO17Q133-l3>yFetSigJm#WcZ|dm)20KJagG0joSPrPl(2LDz2A zrHh4}`tf{@U``&i^jr2MU6KP}&zl+JTTH^6Gv;P4F4CP!$p_}tq{3#0@vW4(4q)Cc zZ%AmaGX8OD9x$hitj&4G)8&QDS;nE%1dt4689_3Xr$^dhZJul%Dl;QpWo^-$twZ&R z!2HRS-ZGk9IM6obOV9`bv=RL}LrrkIePOTLP2IrLXV6jnl2VN9W5}*$_Z*y4I}Z8R zkS|3F%c52w`6l7bnwFsKb)>F_9SyKc2c5kSq41vwR_ya3Qnw+67Y6I7W1QJ4b{oUFLb8PB0|x-u%8k`y)f*&Rf3G3x*ti Gulg^!r@pBG diff --git a/ingest_eia_energy_layers.py b/ingest_eia_energy_layers.py index 81ef532..c82a6f0 100644 --- a/ingest_eia_energy_layers.py +++ b/ingest_eia_energy_layers.py @@ -29,6 +29,7 @@ from typing import List, Optional, Dict, Any import psycopg2 import requests +from psycopg2 import sql DB_NAME = "data_centers" @@ -651,6 +652,72 @@ def build_summary_table(conn): cur.execute(f"analyze {SUMMARY_TABLE}") +def prune_stale_layer_versions(conn) -> int: + """Drop superseded EIA layer tables and remove stale catalog rows. + + Superseded versions are identified by a normalized source key + (source_url without trailing /data). The newest entry is kept. + """ + with conn.cursor() as cur: + cur.execute( + """ + with ranked as ( + select + c.table_name, + row_number() over ( + partition by coalesce( + nullif(regexp_replace(c.source_url, '/data/?$', ''), ''), + nullif(c.source_item_id, ''), + c.table_name + ) + order by c.imported_at desc, c.table_name desc + ) as rn + from public.energy_atlas_layers_catalog c + ) + select r.table_name + from ranked r + where r.rn > 1 + """ + ) + stale_tables = [row[0] for row in cur.fetchall()] + + pruned = 0 + with conn: + with conn.cursor() as cur: + for table_name in stale_tables: + # Guardrail: only manage script-owned EIA tables. + if not table_name.startswith("energy_eia_"): + continue + + cur.execute( + """ + select exists ( + select 1 + from information_schema.tables + where table_schema='public' and table_name=%s + ) + """, + (table_name,), + ) + table_exists = cur.fetchone()[0] + + if table_exists: + cur.execute( + sql.SQL("drop table if exists public.{} cascade").format( + sql.Identifier(table_name) + ) + ) + print(f"pruned stale table public.{table_name}") + + cur.execute( + "delete from public.energy_atlas_layers_catalog where table_name = %s", + (table_name,), + ) + pruned += 1 + + return pruned + + def parse_args(): """Parse command-line arguments.""" parser = argparse.ArgumentParser( @@ -680,6 +747,11 @@ def parse_args(): action="store_true", help="List selected datasets and exit.", ) + parser.add_argument( + "--keep-stale-tables", + action="store_true", + help="Do not prune superseded EIA tables/catalog entries.", + ) return parser.parse_args() @@ -730,13 +802,36 @@ def main(): except Exception as e: print(f" warning: import failed ({type(e).__name__}); skipping") continue + + if not args.keep_stale_tables: + pruned = prune_stale_layer_versions(conn) + if pruned > 0: + print(f"pruned stale layer versions: {pruned}") # Rebuild GEOID links from catalog. with conn.cursor() as cur: cur.execute( """ + with ranked as ( + select + c.table_name, + c.category, + row_number() over ( + partition by coalesce( + nullif(regexp_replace(c.source_url, '/data/?$', ''), ''), + nullif(c.source_item_id, ''), + c.table_name + ) + order by c.imported_at desc, c.table_name desc + ) as rn + from public.energy_atlas_layers_catalog c + join information_schema.tables t + on t.table_schema = 'public' + and t.table_name = c.table_name + ) select table_name, category - from public.energy_atlas_layers_catalog + from ranked + where rn = 1 order by table_name """ )