feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
"""Retroactive migration: regenerate source_url for existing HUD deals.
|
||||
|
||||
Background: B3 v1 bug saved generic URL `?citystate=FL` for ALL 39 HUD deals.
|
||||
B3 v1.1 fix: derive source_url from case_number via build_deep_link().
|
||||
|
||||
This script:
|
||||
1. Iterates all deals where source='hud_homestore'
|
||||
2. For each: regenerates source_url from case_number
|
||||
3. Updates the row
|
||||
4. Reports: how many fixed, how many had no case_number (would mark as NULL)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import io, sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from deals_db import init_db, _get_conn
|
||||
from scrapers.hud_homestore import build_deep_link
|
||||
|
||||
|
||||
def main() -> int:
|
||||
init_db()
|
||||
conn = _get_conn()
|
||||
|
||||
rows = conn.execute(
|
||||
"SELECT id, case_number, source_url, address FROM deals WHERE source = 'hud_homestore'"
|
||||
).fetchall()
|
||||
|
||||
print(f"HUD deals to migrate: {len(rows)}")
|
||||
print()
|
||||
|
||||
fixed = 0
|
||||
no_case = 0
|
||||
already_ok = 0
|
||||
unchanged_other = 0
|
||||
|
||||
for r in rows:
|
||||
deal_id = r["id"]
|
||||
case_number = r["case_number"]
|
||||
old_url = r["source_url"]
|
||||
addr = (r["address"] or "?")[:50]
|
||||
|
||||
new_url = build_deep_link(case_number)
|
||||
|
||||
if new_url is None:
|
||||
# No case_number → cannot construct deep-link; nullify
|
||||
if old_url is None:
|
||||
unchanged_other += 1
|
||||
else:
|
||||
conn.execute("UPDATE deals SET source_url = NULL WHERE id = ?", (deal_id,))
|
||||
no_case += 1
|
||||
print(f" id={deal_id} case=None → set NULL (was {old_url[:60] if old_url else None})")
|
||||
print(f" addr: {addr}")
|
||||
continue
|
||||
|
||||
if old_url == new_url:
|
||||
already_ok += 1
|
||||
continue
|
||||
|
||||
conn.execute("UPDATE deals SET source_url = ? WHERE id = ?", (new_url, deal_id))
|
||||
fixed += 1
|
||||
if fixed <= 5:
|
||||
print(f" id={deal_id} case={case_number}")
|
||||
print(f" old: {old_url}")
|
||||
print(f" new: {new_url}")
|
||||
print(f" addr: {addr}")
|
||||
|
||||
print()
|
||||
print(f"=== Migration summary ===")
|
||||
print(f" Fixed (URL regenerated): {fixed}")
|
||||
print(f" No case_number (set NULL): {no_case}")
|
||||
print(f" Already correct: {already_ok}")
|
||||
print(f" Other unchanged: {unchanged_other}")
|
||||
print()
|
||||
print(f"Total HUD deals: {len(rows)}")
|
||||
|
||||
# Verify
|
||||
print()
|
||||
print("=== Verification: 5 random URLs post-migration ===")
|
||||
rows2 = conn.execute(
|
||||
"SELECT id, case_number, source_url FROM deals WHERE source='hud_homestore' LIMIT 5"
|
||||
).fetchall()
|
||||
for r in rows2:
|
||||
url = r["source_url"]
|
||||
case = r["case_number"]
|
||||
case_in_url = case in (url or "") if case else None
|
||||
print(f" id={r['id']} case={case}")
|
||||
print(f" url={url}")
|
||||
print(f" case_in_url={case_in_url}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user