"""Validate B3 source_url bugfix. 1. Unit test: build_deep_link() with sample case numbers. 2. Scrape fresh HUD listings → verify each deal_record has unique deep-link. 3. HTTP-verify (via Playwright) that 3 random deep-links actually open the property. """ from __future__ import annotations import io, sys, time from pathlib import Path sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from scrapers.hud_homestore import build_deep_link, scrape_hud_homestore def test_unit(): """Unit tests for build_deep_link().""" print("=" * 60) print("UNIT TESTS — build_deep_link()") print("=" * 60) cases = [ ("093-676572", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-676572"), ("093-612260", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-612260"), (None, None), ("", None), (" 093-727486 ", "https://www.hudhomestore.gov/PropertyDetails?caseNumber=093-727486"), ] failures = 0 for inp, expected in cases: actual = build_deep_link(inp) ok = "✅" if actual == expected else "❌" print(f" {ok} build_deep_link({inp!r}) → {actual}") if actual != expected: failures += 1 print(f" expected: {expected}") return failures def test_scrape_unique_urls(): """Scrape fresh + verify each deal has a unique deep-link.""" print() print("=" * 60) print("SCRAPE TEST — fresh scrape, verify unique deep-links") print("=" * 60) deals = scrape_hud_homestore( states=["FL"], status_cb=lambda m: print(f" {m}"), use_cache=True, ) print(f"\n Total: {len(deals)} deals scraped") # Verify uniqueness urls = [d.get("source_url") for d in deals] unique = set(urls) print(f" Unique source_urls: {len(unique)} (should be == {len(deals)})") # Sample 5 print() print(" Sample 5 deal source_urls:") for d in deals[:5]: url = d.get("source_url") case = d.get("case_number") addr = (d.get("address") or "?")[:50] # Verify URL contains case_number case_in_url = case in (url or "") if case else False print(f" case={case} url={url}") print(f" addr={addr} | case_in_url={case_in_url}") return deals def test_http_verify(deals, n_samples=3): """HTTP-load 3 random deep-links via Playwright; verify page renders the property.""" print() print("=" * 60) print(f"HTTP VERIFY — {n_samples} random deep-links open correct property") print("=" * 60) from playwright.sync_api import sync_playwright REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") # Pick first, middle, last samples = [deals[0], deals[len(deals)//2], deals[-1]] if len(deals) >= 3 else deals failures = 0 with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context(user_agent=REAL_UA, viewport={"width": 1400, "height": 900}).new_page() page.set_default_timeout(20_000) # Set session page.goto("https://www.hudhomestore.gov/", wait_until="networkidle") time.sleep(1.5) for d in samples: url = d.get("source_url") case = d.get("case_number") addr = d.get("address") or "" print(f"\n Testing case={case}") print(f" URL: {url}") print(f" Expected address: {addr[:60]}") try: page.goto(url, wait_until="networkidle", timeout=20_000) time.sleep(3) body = page.locator("body").inner_text() # Verify body contains the case number AND part of the expected address # Use the street number as a strong signal import re street_num_match = re.match(r"^(\d+)", addr) street_num = street_num_match.group(1) if street_num_match else None has_case = case in body has_street_num = (street_num in body) if street_num else False has_price = (str(int(d.get("listing_price") or 0)) in body.replace(",", "")) print(f" has_case#={has_case}, has_street_num={has_street_num}, has_price={has_price}") if has_case and (has_street_num or has_price): print(f" ✅ Deep-link renders the correct property") else: print(f" ❌ Deep-link does NOT render the expected property") failures += 1 except Exception as e: print(f" ❌ ERROR: {e}") failures += 1 browser.close() return failures def main(): rc = 0 rc += test_unit() deals = test_scrape_unique_urls() if deals: rc += test_http_verify(deals, n_samples=3) print() print("=" * 60) if rc == 0: print("✅ ALL TESTS PASSED") else: print(f"❌ {rc} failures") return rc if __name__ == "__main__": sys.exit(main())