Files
AR-House/scripts/test_hud_deeplink_fix.py
T
2026-07-03 12:24:58 -04:00

148 lines
5.1 KiB
Python

"""Validate B3 source_url bugfix.
1. Unit test: build_deep_link() with sample case numbers.
2. Scrape fresh HUD listings → verify each deal_record has unique deep-link.
3. HTTP-verify (via Playwright) that 3 random deep-links actually open the property.
"""
from __future__ import annotations
import io, sys, time
from pathlib import Path
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from scrapers.hud_homestore import build_deep_link, scrape_hud_homestore
def test_unit():
"""Unit tests for build_deep_link()."""
print("=" * 60)
print("UNIT TESTS — build_deep_link()")
print("=" * 60)
cases = [
("093-676572", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-676572"),
("093-612260", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-612260"),
(None, None),
("", None),
(" 093-727486 ", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-727486"),
]
failures = 0
for inp, expected in cases:
actual = build_deep_link(inp)
ok = "✅" if actual == expected else "❌"
print(f" {ok} build_deep_link({inp!r}) → {actual}")
if actual != expected:
failures += 1
print(f" expected: {expected}")
return failures
def test_scrape_unique_urls():
"""Scrape fresh + verify each deal has a unique deep-link."""
print()
print("=" * 60)
print("SCRAPE TEST — fresh scrape, verify unique deep-links")
print("=" * 60)
deals = scrape_hud_homestore(
states=["FL"],
status_cb=lambda m: print(f" {m}"),
use_cache=True,
)
print(f"\n Total: {len(deals)} deals scraped")
# Verify uniqueness
urls = [d.get("source_url") for d in deals]
unique = set(urls)
print(f" Unique source_urls: {len(unique)} (should be == {len(deals)})")
# Sample 5
print()
print(" Sample 5 deal source_urls:")
for d in deals[:5]:
url = d.get("source_url")
case = d.get("case_number")
addr = (d.get("address") or "?")[:50]
# Verify URL contains case_number
case_in_url = case in (url or "") if case else False
print(f" case={case} url={url}")
print(f" addr={addr} | case_in_url={case_in_url}")
return deals
def test_http_verify(deals, n_samples=3):
"""HTTP-load 3 random deep-links via Playwright; verify page renders the property."""
print()
print("=" * 60)
print(f"HTTP VERIFY — {n_samples} random deep-links open correct property")
print("=" * 60)
from playwright.sync_api import sync_playwright
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
# Pick first, middle, last
samples = [deals[0], deals[len(deals)//2], deals[-1]] if len(deals) >= 3 else deals
failures = 0
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_context(user_agent=REAL_UA, viewport={"width": 1400, "height": 900}).new_page()
page.set_default_timeout(20_000)
# Set session
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
time.sleep(1.5)
for d in samples:
url = d.get("source_url")
case = d.get("case_number")
addr = d.get("address") or ""
print(f"\n Testing case={case}")
print(f" URL: {url}")
print(f" Expected address: {addr[:60]}")
try:
page.goto(url, wait_until="networkidle", timeout=20_000)
time.sleep(3)
body = page.locator("body").inner_text()
# Verify body contains the case number AND part of the expected address
# Use the street number as a strong signal
import re
street_num_match = re.match(r"^(\d+)", addr)
street_num = street_num_match.group(1) if street_num_match else None
has_case = case in body
has_street_num = (street_num in body) if street_num else False
has_price = (str(int(d.get("listing_price") or 0)) in body.replace(",", ""))
print(f" has_case#={has_case}, has_street_num={has_street_num}, has_price={has_price}")
if has_case and (has_street_num or has_price):
print(f" ✅ Deep-link renders the correct property")
else:
print(f" ❌ Deep-link does NOT render the expected property")
failures += 1
except Exception as e:
print(f" ❌ ERROR: {e}")
failures += 1
browser.close()
return failures
def main():
rc = 0
rc += test_unit()
deals = test_scrape_unique_urls()
if deals:
rc += test_http_verify(deals, n_samples=3)
print()
print("=" * 60)
if rc == 0:
print("✅ ALL TESTS PASSED")
else:
print(f"❌ {rc} failures")
return rc
if __name__ == "__main__":
sys.exit(main())