"""Explore HUD Homestore (hudhomestore.gov) site structure with Playwright. Goals: 1. Find URL pattern for FL state results 2. Find the search form input names/IDs 3. Document what fields each listing shows in results 4. Find URL pattern for individual property detail pages """ from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright # Real Chrome UA — federal sites often block non-standard UAs REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") def main(): with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent=REAL_UA, viewport={"width": 1400, "height": 900}, locale="en-US", timezone_id="America/New_York", ) page = context.new_page() page.set_default_timeout(30_000) # Step 1: Load landing page print("=" * 70) print("Step 1: landing page") print("=" * 70) response = page.goto("https://www.hudhomestore.gov/", wait_until="networkidle", timeout=30_000) print(f" status: {response.status}") print(f" url: {page.url}") print(f" title: {page.title()}") time.sleep(2) # Save landing HTML landing_html = page.content() with open("scripts/_hud_landing.html", "w", encoding="utf-8") as f: f.write(landing_html) print(f" HTML saved: scripts/_hud_landing.html ({len(landing_html):,} chars)") # Inspect form inputs print() print("--- INPUTS (first 30) ---") for i, inp in enumerate(page.locator("input").all()[:30]): try: attrs = page.evaluate("""(el) => { const out = {}; for (const a of el.attributes) out[a.name] = a.value; return out; }""", inp.element_handle()) print(f" [{i}] {attrs}") except Exception: pass # Inspect select dropdowns (state selector likely) print() print("--- SELECTS ---") for i, sel in enumerate(page.locator("select").all()[:10]): try: attrs = page.evaluate("""(el) => { const out = {}; for (const a of el.attributes) out[a.name] = a.value; return out; }""", sel.element_handle()) opts_count = sel.locator("option").count() print(f" [{i}] {attrs} ({opts_count} options)") except Exception: pass # Inspect navigation links print() print("--- KEY LINKS (search, results, etc) ---") for link in page.locator("a").all()[:80]: try: href = link.get_attribute("href") or "" text = (link.text_content() or "").strip()[:80] if any(kw in href.lower() for kw in ["search", "result", "state", "property"]) or \ any(kw in text.lower() for kw in ["search", "browse", "list", "view all"]): if href and not href.startswith("javascript:") and not href.startswith("#"): print(f" href={href} | text='{text}'") except Exception: pass # Step 2: try direct URL paths print() print("=" * 70) print("Step 2: probe direct URLs") print("=" * 70) urls_to_try = [ "https://www.hudhomestore.gov/searchresult", "https://www.hudhomestore.gov/SearchResult", "https://www.hudhomestore.gov/Search", "https://www.hudhomestore.gov/Listing/PropertyDetails", ] for url in urls_to_try: try: r = page.goto(url, wait_until="domcontentloaded", timeout=15_000) print(f" {url}: status={r.status}, final={page.url}, title={page.title()}") except Exception as e: print(f" {url}: ERROR {e}") # Step 3: Try searching with FL state via search form print() print("=" * 70) print("Step 3: search FL via form") print("=" * 70) page.goto("https://www.hudhomestore.gov/", wait_until="networkidle", timeout=30_000) time.sleep(2) # Try filling state select with FL state_filled = False for sel in ["select[name*='State']", "select[id*='State']", "select[id*='state']"]: try: if page.locator(sel).count() > 0: page.locator(sel).first.select_option(value="FL") state_filled = True print(f" State filled with 'FL' via {sel}") break except Exception as e: pass if not state_filled: # Try option label "Florida" for sel in ["select"]: try: selects = page.locator(sel).all() for s in selects: try: s.select_option(label="Florida") state_filled = True print(f" State filled with 'Florida' label") break except Exception: continue if state_filled: break except Exception: pass # Now try submit search try: page.locator("button:has-text('Search'), input[type='submit']").first.click() page.wait_for_load_state("networkidle", timeout=15_000) time.sleep(2) print(f" Search submitted. URL after: {page.url}") results_html = page.content() with open("scripts/_hud_results_fl.html", "w", encoding="utf-8") as f: f.write(results_html) print(f" Results HTML saved: scripts/_hud_results_fl.html ({len(results_html):,} chars)") # Inspect what came back — tables or cards? tables = page.locator("table").all() print(f" tables found: {len(tables)}") cards = page.locator(".propertyResult, .property-result, .listing, [class*='listing']").all() print(f" card-like elements: {len(cards)}") # If table-based, dump first 3 rows of each table for i, t in enumerate(tables[:3]): rows = t.locator("tr").all() print(f" Table [{i}]: {len(rows)} rows") for j, r in enumerate(rows[:3]): cells = [(c.text_content() or "").strip()[:50] for c in r.locator("td, th").all()] non_empty = [c for c in cells if c] if non_empty: print(f" Row {j}: {non_empty}") # Look for links to property detail print() print(" Property detail links (sample):") for link in page.locator("a").all()[:50]: try: href = link.get_attribute("href") or "" if "property" in href.lower() or "case" in href.lower() or "listing" in href.lower(): if "javascript" not in href: text = (link.text_content() or "").strip()[:60] print(f" href={href} | text='{text}'") except Exception: pass except Exception as e: print(f" Search submit ERROR: {e}") browser.close() if __name__ == "__main__": main()