99 lines
3.0 KiB
Python
99 lines
3.0 KiB
Python
"""Retroactive migration: regenerate source_url for existing HUD deals.
|
|
|
|
Background: B3 v1 bug saved generic URL `?citystate=FL` for ALL 39 HUD deals.
|
|
B3 v1.1 fix: derive source_url from case_number via build_deep_link().
|
|
|
|
This script:
|
|
1. Iterates all deals where source='hud_homestore'
|
|
2. For each: regenerates source_url from case_number
|
|
3. Updates the row
|
|
4. Reports: how many fixed, how many had no case_number (would mark as NULL)
|
|
"""
|
|
from __future__ import annotations
|
|
import io, sys
|
|
from pathlib import Path
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from deals_db import init_db, _get_conn
|
|
from scrapers.hud_homestore import build_deep_link
|
|
|
|
|
|
def main() -> int:
|
|
init_db()
|
|
conn = _get_conn()
|
|
|
|
rows = conn.execute(
|
|
"SELECT id, case_number, source_url, address FROM deals WHERE source = 'hud_homestore'"
|
|
).fetchall()
|
|
|
|
print(f"HUD deals to migrate: {len(rows)}")
|
|
print()
|
|
|
|
fixed = 0
|
|
no_case = 0
|
|
already_ok = 0
|
|
unchanged_other = 0
|
|
|
|
for r in rows:
|
|
deal_id = r["id"]
|
|
case_number = r["case_number"]
|
|
old_url = r["source_url"]
|
|
addr = (r["address"] or "?")[:50]
|
|
|
|
new_url = build_deep_link(case_number)
|
|
|
|
if new_url is None:
|
|
# No case_number → cannot construct deep-link; nullify
|
|
if old_url is None:
|
|
unchanged_other += 1
|
|
else:
|
|
conn.execute("UPDATE deals SET source_url = NULL WHERE id = ?", (deal_id,))
|
|
no_case += 1
|
|
print(f" id={deal_id} case=None → set NULL (was {old_url[:60] if old_url else None})")
|
|
print(f" addr: {addr}")
|
|
continue
|
|
|
|
if old_url == new_url:
|
|
already_ok += 1
|
|
continue
|
|
|
|
conn.execute("UPDATE deals SET source_url = ? WHERE id = ?", (new_url, deal_id))
|
|
fixed += 1
|
|
if fixed <= 5:
|
|
print(f" id={deal_id} case={case_number}")
|
|
print(f" old: {old_url}")
|
|
print(f" new: {new_url}")
|
|
print(f" addr: {addr}")
|
|
|
|
print()
|
|
print(f"=== Migration summary ===")
|
|
print(f" Fixed (URL regenerated): {fixed}")
|
|
print(f" No case_number (set NULL): {no_case}")
|
|
print(f" Already correct: {already_ok}")
|
|
print(f" Other unchanged: {unchanged_other}")
|
|
print()
|
|
print(f"Total HUD deals: {len(rows)}")
|
|
|
|
# Verify
|
|
print()
|
|
print("=== Verification: 5 random URLs post-migration ===")
|
|
rows2 = conn.execute(
|
|
"SELECT id, case_number, source_url FROM deals WHERE source='hud_homestore' LIMIT 5"
|
|
).fetchall()
|
|
for r in rows2:
|
|
url = r["source_url"]
|
|
case = r["case_number"]
|
|
case_in_url = case in (url or "") if case else None
|
|
print(f" id={r['id']} case={case}")
|
|
print(f" url={url}")
|
|
print(f" case_in_url={case_in_url}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|