feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
"""Backfill clerk deal photos via County Property Appraiser sites (GRATIS).
|
||||
|
||||
Alternativa a backfill_zillow_photos.py — usa Playwright sobre PA sites,
|
||||
cero costo Firecrawl.
|
||||
|
||||
Coverage actual: solo Broward (~70 deals). Phase 3.5.B: agregar Duval, etc.
|
||||
|
||||
Solo procesa deals que NO tienen foto AUN. Idempotent.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, io, json, sys, time
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from deals_db import init_db, _get_conn
|
||||
from data_fetchers.pa_photo_lookup import _fetch_broward_batch
|
||||
|
||||
|
||||
COUNTY_TO_SOURCE = {
|
||||
"Broward": "broward_clerk",
|
||||
# "Duval": "duval_clerk", # Phase 3.5.B
|
||||
# "Hillsborough": "hillsborough_clerk", # Phase 3.5.B
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--county", default="Broward", help="County to backfill (default Broward)")
|
||||
ap.add_argument("--limit", type=int, default=None)
|
||||
ap.add_argument("--dry-run", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
init_db()
|
||||
conn = _get_conn()
|
||||
|
||||
source = COUNTY_TO_SOURCE.get(args.county)
|
||||
if not source:
|
||||
print(f"ERROR: county '{args.county}' not yet supported. Available: {list(COUNTY_TO_SOURCE.keys())}")
|
||||
return 1
|
||||
|
||||
# Find clerk deals WITHOUT photo
|
||||
q = (
|
||||
"SELECT id, parcel_id, address FROM deals "
|
||||
"WHERE source = ? "
|
||||
"AND parcel_id IS NOT NULL AND parcel_id != '' "
|
||||
"AND (photos_urls IS NULL OR photos_urls = '' OR photos_urls = '[]') "
|
||||
"ORDER BY id"
|
||||
)
|
||||
if args.limit:
|
||||
q += f" LIMIT {args.limit}"
|
||||
rows = conn.execute(q, (source,)).fetchall()
|
||||
|
||||
print(f"Found {len(rows)} {args.county} deals sin foto")
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
parcel_ids = [r["parcel_id"] for r in rows]
|
||||
print(f"Starting batch fetch via {args.county} PA (Playwright, gratis)...")
|
||||
print(f"Estimated time: ~{len(parcel_ids) * 12}s ({len(parcel_ids) * 12 // 60}m)")
|
||||
print()
|
||||
|
||||
t0 = time.perf_counter()
|
||||
results = _fetch_broward_batch(parcel_ids, timeout_seconds=20)
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
hits = 0
|
||||
misses = 0
|
||||
for r in rows:
|
||||
photo = results.get(r["parcel_id"])
|
||||
if photo:
|
||||
hits += 1
|
||||
if not args.dry_run:
|
||||
conn.execute(
|
||||
"UPDATE deals SET photos_urls = ? WHERE id = ?",
|
||||
(json.dumps([photo]), r["id"]),
|
||||
)
|
||||
print(f" ✓ id={r['id']} parcel={r['parcel_id']} → {photo[-60:]}")
|
||||
else:
|
||||
misses += 1
|
||||
if not args.dry_run:
|
||||
conn.execute(
|
||||
"UPDATE deals SET photos_urls = ? WHERE id = ?",
|
||||
("[]", r["id"]),
|
||||
)
|
||||
print(f" ✗ id={r['id']} parcel={r['parcel_id']} no photo found")
|
||||
|
||||
print()
|
||||
print("=" * 50)
|
||||
print(f"DONE in {elapsed:.0f}s ({elapsed/60:.1f} min)")
|
||||
print(f" Hits: {hits}/{len(rows)}")
|
||||
print(f" Misses: {misses}/{len(rows)}")
|
||||
print(f" Hit rate: {hits*100//len(rows)}%")
|
||||
print(f" Cost: $0 (Playwright gratis)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user