"""Backfill clerk deal photos via County Property Appraiser sites (GRATIS). Alternativa a backfill_zillow_photos.py — usa Playwright sobre PA sites, cero costo Firecrawl. Coverage actual: solo Broward (~70 deals). Phase 3.5.B: agregar Duval, etc. Solo procesa deals que NO tienen foto AUN. Idempotent. """ from __future__ import annotations import argparse, io, json, sys, time from pathlib import Path sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from deals_db import init_db, _get_conn from data_fetchers.pa_photo_lookup import _fetch_broward_batch COUNTY_TO_SOURCE = { "Broward": "broward_clerk", # "Duval": "duval_clerk", # Phase 3.5.B # "Hillsborough": "hillsborough_clerk", # Phase 3.5.B } def main(): ap = argparse.ArgumentParser() ap.add_argument("--county", default="Broward", help="County to backfill (default Broward)") ap.add_argument("--limit", type=int, default=None) ap.add_argument("--dry-run", action="store_true") args = ap.parse_args() init_db() conn = _get_conn() source = COUNTY_TO_SOURCE.get(args.county) if not source: print(f"ERROR: county '{args.county}' not yet supported. Available: {list(COUNTY_TO_SOURCE.keys())}") return 1 # Find clerk deals WITHOUT photo q = ( "SELECT id, parcel_id, address FROM deals " "WHERE source = ? " "AND parcel_id IS NOT NULL AND parcel_id != '' " "AND (photos_urls IS NULL OR photos_urls = '' OR photos_urls = '[]') " "ORDER BY id" ) if args.limit: q += f" LIMIT {args.limit}" rows = conn.execute(q, (source,)).fetchall() print(f"Found {len(rows)} {args.county} deals sin foto") if not rows: return 0 parcel_ids = [r["parcel_id"] for r in rows] print(f"Starting batch fetch via {args.county} PA (Playwright, gratis)...") print(f"Estimated time: ~{len(parcel_ids) * 12}s ({len(parcel_ids) * 12 // 60}m)") print() t0 = time.perf_counter() results = _fetch_broward_batch(parcel_ids, timeout_seconds=20) elapsed = time.perf_counter() - t0 hits = 0 misses = 0 for r in rows: photo = results.get(r["parcel_id"]) if photo: hits += 1 if not args.dry_run: conn.execute( "UPDATE deals SET photos_urls = ? WHERE id = ?", (json.dumps([photo]), r["id"]), ) print(f" ✓ id={r['id']} parcel={r['parcel_id']} → {photo[-60:]}") else: misses += 1 if not args.dry_run: conn.execute( "UPDATE deals SET photos_urls = ? WHERE id = ?", ("[]", r["id"]), ) print(f" ✗ id={r['id']} parcel={r['parcel_id']} no photo found") print() print("=" * 50) print(f"DONE in {elapsed:.0f}s ({elapsed/60:.1f} min)") print(f" Hits: {hits}/{len(rows)}") print(f" Misses: {misses}/{len(rows)}") print(f" Hit rate: {hits*100//len(rows)}%") print(f" Cost: $0 (Playwright gratis)") return 0 if __name__ == "__main__": sys.exit(main())