Files
AR-House/scripts/migrate_hud_source_urls.py
T
2026-07-03 12:24:58 -04:00

99 lines
3.0 KiB
Python

"""Retroactive migration: regenerate source_url for existing HUD deals.
Background: B3 v1 bug saved generic URL `?citystate=FL` for ALL 39 HUD deals.
B3 v1.1 fix: derive source_url from case_number via build_deep_link().
This script:
1. Iterates all deals where source='hud_homestore'
2. For each: regenerates source_url from case_number
3. Updates the row
4. Reports: how many fixed, how many had no case_number (would mark as NULL)
"""
from __future__ import annotations
import io, sys
from pathlib import Path
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from deals_db import init_db, _get_conn
from scrapers.hud_homestore import build_deep_link
def main() -> int:
init_db()
conn = _get_conn()
rows = conn.execute(
"SELECT id, case_number, source_url, address FROM deals WHERE source = 'hud_homestore'"
).fetchall()
print(f"HUD deals to migrate: {len(rows)}")
print()
fixed = 0
no_case = 0
already_ok = 0
unchanged_other = 0
for r in rows:
deal_id = r["id"]
case_number = r["case_number"]
old_url = r["source_url"]
addr = (r["address"] or "?")[:50]
new_url = build_deep_link(case_number)
if new_url is None:
# No case_number → cannot construct deep-link; nullify
if old_url is None:
unchanged_other += 1
else:
conn.execute("UPDATE deals SET source_url = NULL WHERE id = ?", (deal_id,))
no_case += 1
print(f" id={deal_id} case=None → set NULL (was {old_url[:60] if old_url else None})")
print(f" addr: {addr}")
continue
if old_url == new_url:
already_ok += 1
continue
conn.execute("UPDATE deals SET source_url = ? WHERE id = ?", (new_url, deal_id))
fixed += 1
if fixed <= 5:
print(f" id={deal_id} case={case_number}")
print(f" old: {old_url}")
print(f" new: {new_url}")
print(f" addr: {addr}")
print()
print(f"=== Migration summary ===")
print(f" Fixed (URL regenerated): {fixed}")
print(f" No case_number (set NULL): {no_case}")
print(f" Already correct: {already_ok}")
print(f" Other unchanged: {unchanged_other}")
print()
print(f"Total HUD deals: {len(rows)}")
# Verify
print()
print("=== Verification: 5 random URLs post-migration ===")
rows2 = conn.execute(
"SELECT id, case_number, source_url FROM deals WHERE source='hud_homestore' LIMIT 5"
).fetchall()
for r in rows2:
url = r["source_url"]
case = r["case_number"]
case_in_url = case in (url or "") if case else None
print(f" id={r['id']} case={case}")
print(f" url={url}")
print(f" case_in_url={case_in_url}")
return 0
if __name__ == "__main__":
sys.exit(main())