feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
"""Inspect actual content of /searchresult?state=FL."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(
|
||||
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
||||
locale="en-US", timezone_id="America/New_York",
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(30_000)
|
||||
|
||||
# Load landing first to set session
|
||||
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
|
||||
time.sleep(2)
|
||||
|
||||
# Then go to search
|
||||
page.goto("https://www.hudhomestore.gov/searchresult?state=FL", wait_until="networkidle", timeout=30_000)
|
||||
time.sleep(3)
|
||||
|
||||
print("URL:", page.url)
|
||||
print("Title:", page.title())
|
||||
print()
|
||||
|
||||
body_text = page.locator("body").inner_text()
|
||||
print("--- BODY TEXT (first 3000 chars) ---")
|
||||
print(body_text[:3000])
|
||||
|
||||
# Save full HTML
|
||||
with open("scripts/_hud_search_fl.html", "w", encoding="utf-8") as f:
|
||||
f.write(page.content())
|
||||
print(f"\nFull HTML saved: scripts/_hud_search_fl.html")
|
||||
|
||||
# Dump all <div> with class containing 'property' or 'listing' or 'home' or 'card'
|
||||
print()
|
||||
print("--- LISTING DIVS ---")
|
||||
for cls in ["[class*='property']", "[class*='listing']", "[class*='home-card']",
|
||||
"[class*='card']", "[class*='result']", "[class*='item']"]:
|
||||
els = page.locator(f"div{cls}").all()
|
||||
if els:
|
||||
print(f"\n div{cls}: {len(els)} elements")
|
||||
for i, el in enumerate(els[:3]):
|
||||
txt = (el.text_content() or "").strip()[:300]
|
||||
if txt:
|
||||
print(f" [{i}] {txt!r}")
|
||||
|
||||
# Try links to detail pages
|
||||
print()
|
||||
print("--- LINKS TO POTENTIAL DETAIL PAGES ---")
|
||||
for link in page.locator("a").all()[:60]:
|
||||
try:
|
||||
href = link.get_attribute("href") or ""
|
||||
text = (link.text_content() or "").strip()[:80]
|
||||
if ("propertydetails" in href.lower() or "casenumber" in href.lower()
|
||||
or "listing" in href.lower() or "?case" in href.lower()
|
||||
or "property-details" in href.lower()):
|
||||
if "javascript" not in href:
|
||||
print(f" href={href} | text='{text}'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user