"""Inspect actual content of /searchresult?state=FL.""" from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent=REAL_UA, viewport={"width": 1400, "height": 900}, locale="en-US", timezone_id="America/New_York", ) page = context.new_page() page.set_default_timeout(30_000) # Load landing first to set session page.goto("https://www.hudhomestore.gov/", wait_until="networkidle") time.sleep(2) # Then go to search page.goto("https://www.hudhomestore.gov/searchresult?state=FL", wait_until="networkidle", timeout=30_000) time.sleep(3) print("URL:", page.url) print("Title:", page.title()) print() body_text = page.locator("body").inner_text() print("--- BODY TEXT (first 3000 chars) ---") print(body_text[:3000]) # Save full HTML with open("scripts/_hud_search_fl.html", "w", encoding="utf-8") as f: f.write(page.content()) print(f"\nFull HTML saved: scripts/_hud_search_fl.html") # Dump all
with class containing 'property' or 'listing' or 'home' or 'card' print() print("--- LISTING DIVS ---") for cls in ["[class*='property']", "[class*='listing']", "[class*='home-card']", "[class*='card']", "[class*='result']", "[class*='item']"]: els = page.locator(f"div{cls}").all() if els: print(f"\n div{cls}: {len(els)} elements") for i, el in enumerate(els[:3]): txt = (el.text_content() or "").strip()[:300] if txt: print(f" [{i}] {txt!r}") # Try links to detail pages print() print("--- LINKS TO POTENTIAL DETAIL PAGES ---") for link in page.locator("a").all()[:60]: try: href = link.get_attribute("href") or "" text = (link.text_content() or "").strip()[:80] if ("propertydetails" in href.lower() or "casenumber" in href.lower() or "listing" in href.lower() or "?case" in href.lower() or "property-details" in href.lower()): if "javascript" not in href: print(f" href={href} | text='{text}'") except Exception: pass browser.close()