"""Retroactive migration: regenerate source_url for existing HUD deals. Background: B3 v1 bug saved generic URL `?citystate=FL` for ALL 39 HUD deals. B3 v1.1 fix: derive source_url from case_number via build_deep_link(). This script: 1. Iterates all deals where source='hud_homestore' 2. For each: regenerates source_url from case_number 3. Updates the row 4. Reports: how many fixed, how many had no case_number (would mark as NULL) """ from __future__ import annotations import io, sys from pathlib import Path sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from deals_db import init_db, _get_conn from scrapers.hud_homestore import build_deep_link def main() -> int: init_db() conn = _get_conn() rows = conn.execute( "SELECT id, case_number, source_url, address FROM deals WHERE source = 'hud_homestore'" ).fetchall() print(f"HUD deals to migrate: {len(rows)}") print() fixed = 0 no_case = 0 already_ok = 0 unchanged_other = 0 for r in rows: deal_id = r["id"] case_number = r["case_number"] old_url = r["source_url"] addr = (r["address"] or "?")[:50] new_url = build_deep_link(case_number) if new_url is None: # No case_number → cannot construct deep-link; nullify if old_url is None: unchanged_other += 1 else: conn.execute("UPDATE deals SET source_url = NULL WHERE id = ?", (deal_id,)) no_case += 1 print(f" id={deal_id} case=None → set NULL (was {old_url[:60] if old_url else None})") print(f" addr: {addr}") continue if old_url == new_url: already_ok += 1 continue conn.execute("UPDATE deals SET source_url = ? WHERE id = ?", (new_url, deal_id)) fixed += 1 if fixed <= 5: print(f" id={deal_id} case={case_number}") print(f" old: {old_url}") print(f" new: {new_url}") print(f" addr: {addr}") print() print(f"=== Migration summary ===") print(f" Fixed (URL regenerated): {fixed}") print(f" No case_number (set NULL): {no_case}") print(f" Already correct: {already_ok}") print(f" Other unchanged: {unchanged_other}") print() print(f"Total HUD deals: {len(rows)}") # Verify print() print("=== Verification: 5 random URLs post-migration ===") rows2 = conn.execute( "SELECT id, case_number, source_url FROM deals WHERE source='hud_homestore' LIMIT 5" ).fetchall() for r in rows2: url = r["source_url"] case = r["case_number"] case_in_url = case in (url or "") if case else None print(f" id={r['id']} case={case}") print(f" url={url}") print(f" case_in_url={case_in_url}") return 0 if __name__ == "__main__": sys.exit(main())