"""Re-backfill PA photos para Broward deals affected by Zillow photo misassignment bug. Logic: 1. Find Broward deals whose current photos_urls SHARES URL with other deals (= affected by bug) 2. Para esos, re-fetch foto from bcpa.net via pa_photo_lookup.fetch_pa_photo (gratis) 3. Update DB with correct PA photo URL Cero costos Firecrawl. Usa Playwright local contra bcpa.net SPA. """ from __future__ import annotations import argparse import json import sqlite3 import time import sys sys.path.insert(0, ".") def main(): p = argparse.ArgumentParser() p.add_argument("--dry-run", action="store_true") p.add_argument("--db", default="data/deals.db") p.add_argument("--limit", type=int, default=0, help="Cap for testing (0 = no limit)") args = p.parse_args() conn = sqlite3.connect(args.db) cur = conn.cursor() # Step 1: find Broward deals SIN foto (NULL/empty) OR con duplicate Zillow URL # parcel_id valid + county Broward + sin photo o photo bugged cur.execute(""" SELECT id, parcel_id, address, photos_urls FROM deals WHERE county = 'Broward' AND parcel_id IS NOT NULL AND parcel_id != '' AND parcel_id NOT LIKE 'Property Appraiser%' AND ( photos_urls IS NULL OR photos_urls = '' OR photos_urls = '[]' OR photos_urls IN ( SELECT photos_urls FROM deals WHERE photos_urls IS NOT NULL AND photos_urls != '[]' GROUP BY photos_urls HAVING COUNT(DISTINCT id) > 1 ) ) """) affected = cur.fetchall() if not affected: print("No Broward deals affected by bug — DB clean.") return print(f"Found {len(affected)} Broward deals affected by photo bug.") if args.limit: affected = affected[: args.limit] print(f"Limiting to first {args.limit} for testing.") if args.dry_run: for deal_id, parcel, addr, old_photos in affected[:10]: print(f" Would re-fetch: id={deal_id} parcel={parcel} addr={(addr or '?')[:50]}") print(f"\nDRY RUN — no changes. Re-run sin --dry-run to execute.") return # Step 2: re-fetch via PA from data_fetchers.pa_photo_lookup import fetch_pa_photo updated = 0 failed = 0 t0 = time.time() for deal_id, parcel, addr, old_photos in affected: addr_safe = (addr or "?")[:50].encode("ascii", "replace").decode("ascii") print(f" Fetching id={deal_id} parcel={parcel} addr={addr_safe}...", flush=True) try: photo_url, meta = fetch_pa_photo(county="Broward", parcel_id=parcel, timeout_seconds=25) if photo_url: new_photos = json.dumps([photo_url]) cur.execute("UPDATE deals SET photos_urls = ? WHERE id = ?", (new_photos, deal_id)) conn.commit() updated += 1 print(f" PA photo: {photo_url[-50:]}") else: # Set to NULL so card shows no photo (instead of stale Zillow) cur.execute("UPDATE deals SET photos_urls = NULL WHERE id = ?", (deal_id,)) conn.commit() failed += 1 err = (meta.get("error") or "no photo found")[:80] print(f" NO PA photo found ({err}). photos_urls NULLed.") except Exception as e: failed += 1 err = str(e)[:80].encode("ascii", "replace").decode("ascii") print(f" EXCEPTION: {err}") time.sleep(2) # don't hammer if errors elapsed = time.time() - t0 print() print(f"Done in {elapsed:.0f}s. Updated {updated}, failed/no-photo {failed}.") conn.close() if __name__ == "__main__": main()