AR-House/scripts/explore_hud_searchresult.py

"""Probe HUD Homestore /searchresult with direct query parameters."""
from __future__ import annotations
import io, sys, time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")

from playwright.sync_api import sync_playwright

REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
           "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")

# Try various URL patterns
URLS_TO_TRY = [
    "https://www.hudhomestore.gov/searchresult?state=FL",
    "https://www.hudhomestore.gov/searchresult?searchText=FL",
    "https://www.hudhomestore.gov/searchresult?st=FL",
    "https://www.hudhomestore.gov/searchresult?CityStateZip=FL",
    "https://www.hudhomestore.gov/searchresult?cityStateZip=FL",
    "https://www.hudhomestore.gov/searchresult?state=Florida",
    "https://www.hudhomestore.gov/searchresult?State=FL&sortBy=0",
]

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True)
    context = browser.new_context(
        user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
        locale="en-US", timezone_id="America/New_York",
    )
    page = context.new_page()
    page.set_default_timeout(30_000)

    # FIRST: load landing to set cookies / session
    page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
    time.sleep(2)

    for url in URLS_TO_TRY:
        print(f"\n=== {url} ===")
        try:
            r = page.goto(url, wait_until="networkidle", timeout=20_000)
            time.sleep(2)
            print(f"  status={r.status}, final={page.url}")
            # Check for results
            body = page.locator("body").inner_text()
            # Common indicators of results vs landing
            has_results_text = any(kw in body.lower() for kw in [
                "result(s)", "of property", "of properties", "$", "sale price", "list price",
            ])
            no_results = any(kw in body.lower() for kw in [
                "no result", "no properties", "no homes", "no match",
            ])
            print(f"  has_results_text: {has_results_text}, no_results: {no_results}")
            # Count tables and property-looking elements
            tables = page.locator("table").count()
            divs_listing = page.locator("div[class*='listing'], div[class*='property'], div[class*='result']").count()
            print(f"  tables: {tables}, listing divs: {divs_listing}")
            if has_results_text and not no_results:
                print(f"  → LIKELY HAS RESULTS")
                # save html
                slug = url.split("=")[-1] or "search"
                with open(f"scripts/_hud_search_{slug}.html", "w", encoding="utf-8") as f:
                    f.write(page.content())
                print(f"  saved: scripts/_hud_search_{slug}.html")
                # Dump first 5 listing-like elements
                if divs_listing > 0:
                    for i, el in enumerate(page.locator("div[class*='property'], div[class*='listing']").all()[:5]):
                        txt = (el.text_content() or "").strip()[:200]
                        print(f"    [{i}] {txt!r}")
                break
        except Exception as e:
            print(f"  ERROR: {e}")

    browser.close()