feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,71 @@
|
||||
"""Probe HUD Homestore /searchresult with direct query parameters."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
# Try various URL patterns
|
||||
URLS_TO_TRY = [
|
||||
"https://www.hudhomestore.gov/searchresult?state=FL",
|
||||
"https://www.hudhomestore.gov/searchresult?searchText=FL",
|
||||
"https://www.hudhomestore.gov/searchresult?st=FL",
|
||||
"https://www.hudhomestore.gov/searchresult?CityStateZip=FL",
|
||||
"https://www.hudhomestore.gov/searchresult?cityStateZip=FL",
|
||||
"https://www.hudhomestore.gov/searchresult?state=Florida",
|
||||
"https://www.hudhomestore.gov/searchresult?State=FL&sortBy=0",
|
||||
]
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(
|
||||
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
||||
locale="en-US", timezone_id="America/New_York",
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(30_000)
|
||||
|
||||
# FIRST: load landing to set cookies / session
|
||||
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
|
||||
time.sleep(2)
|
||||
|
||||
for url in URLS_TO_TRY:
|
||||
print(f"\n=== {url} ===")
|
||||
try:
|
||||
r = page.goto(url, wait_until="networkidle", timeout=20_000)
|
||||
time.sleep(2)
|
||||
print(f" status={r.status}, final={page.url}")
|
||||
# Check for results
|
||||
body = page.locator("body").inner_text()
|
||||
# Common indicators of results vs landing
|
||||
has_results_text = any(kw in body.lower() for kw in [
|
||||
"result(s)", "of property", "of properties", "$", "sale price", "list price",
|
||||
])
|
||||
no_results = any(kw in body.lower() for kw in [
|
||||
"no result", "no properties", "no homes", "no match",
|
||||
])
|
||||
print(f" has_results_text: {has_results_text}, no_results: {no_results}")
|
||||
# Count tables and property-looking elements
|
||||
tables = page.locator("table").count()
|
||||
divs_listing = page.locator("div[class*='listing'], div[class*='property'], div[class*='result']").count()
|
||||
print(f" tables: {tables}, listing divs: {divs_listing}")
|
||||
if has_results_text and not no_results:
|
||||
print(f" → LIKELY HAS RESULTS")
|
||||
# save html
|
||||
slug = url.split("=")[-1] or "search"
|
||||
with open(f"scripts/_hud_search_{slug}.html", "w", encoding="utf-8") as f:
|
||||
f.write(page.content())
|
||||
print(f" saved: scripts/_hud_search_{slug}.html")
|
||||
# Dump first 5 listing-like elements
|
||||
if divs_listing > 0:
|
||||
for i, el in enumerate(page.locator("div[class*='property'], div[class*='listing']").all()[:5]):
|
||||
txt = (el.text_content() or "").strip()[:200]
|
||||
print(f" [{i}] {txt!r}")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user