feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
"""Explore HUD Homestore (hudhomestore.gov) site structure with Playwright.
|
||||
|
||||
Goals:
|
||||
1. Find URL pattern for FL state results
|
||||
2. Find the search form input names/IDs
|
||||
3. Document what fields each listing shows in results
|
||||
4. Find URL pattern for individual property detail pages
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# Real Chrome UA — federal sites often block non-standard UAs
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
|
||||
def main():
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(
|
||||
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
||||
locale="en-US", timezone_id="America/New_York",
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(30_000)
|
||||
|
||||
# Step 1: Load landing page
|
||||
print("=" * 70)
|
||||
print("Step 1: landing page")
|
||||
print("=" * 70)
|
||||
response = page.goto("https://www.hudhomestore.gov/", wait_until="networkidle", timeout=30_000)
|
||||
print(f" status: {response.status}")
|
||||
print(f" url: {page.url}")
|
||||
print(f" title: {page.title()}")
|
||||
time.sleep(2)
|
||||
|
||||
# Save landing HTML
|
||||
landing_html = page.content()
|
||||
with open("scripts/_hud_landing.html", "w", encoding="utf-8") as f:
|
||||
f.write(landing_html)
|
||||
print(f" HTML saved: scripts/_hud_landing.html ({len(landing_html):,} chars)")
|
||||
|
||||
# Inspect form inputs
|
||||
print()
|
||||
print("--- INPUTS (first 30) ---")
|
||||
for i, inp in enumerate(page.locator("input").all()[:30]):
|
||||
try:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", inp.element_handle())
|
||||
print(f" [{i}] {attrs}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Inspect select dropdowns (state selector likely)
|
||||
print()
|
||||
print("--- SELECTS ---")
|
||||
for i, sel in enumerate(page.locator("select").all()[:10]):
|
||||
try:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", sel.element_handle())
|
||||
opts_count = sel.locator("option").count()
|
||||
print(f" [{i}] {attrs} ({opts_count} options)")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Inspect navigation links
|
||||
print()
|
||||
print("--- KEY LINKS (search, results, etc) ---")
|
||||
for link in page.locator("a").all()[:80]:
|
||||
try:
|
||||
href = link.get_attribute("href") or ""
|
||||
text = (link.text_content() or "").strip()[:80]
|
||||
if any(kw in href.lower() for kw in ["search", "result", "state", "property"]) or \
|
||||
any(kw in text.lower() for kw in ["search", "browse", "list", "view all"]):
|
||||
if href and not href.startswith("javascript:") and not href.startswith("#"):
|
||||
print(f" href={href} | text='{text}'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Step 2: try direct URL paths
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Step 2: probe direct URLs")
|
||||
print("=" * 70)
|
||||
urls_to_try = [
|
||||
"https://www.hudhomestore.gov/searchresult",
|
||||
"https://www.hudhomestore.gov/SearchResult",
|
||||
"https://www.hudhomestore.gov/Search",
|
||||
"https://www.hudhomestore.gov/Listing/PropertyDetails",
|
||||
]
|
||||
for url in urls_to_try:
|
||||
try:
|
||||
r = page.goto(url, wait_until="domcontentloaded", timeout=15_000)
|
||||
print(f" {url}: status={r.status}, final={page.url}, title={page.title()}")
|
||||
except Exception as e:
|
||||
print(f" {url}: ERROR {e}")
|
||||
|
||||
# Step 3: Try searching with FL state via search form
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Step 3: search FL via form")
|
||||
print("=" * 70)
|
||||
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle", timeout=30_000)
|
||||
time.sleep(2)
|
||||
|
||||
# Try filling state select with FL
|
||||
state_filled = False
|
||||
for sel in ["select[name*='State']", "select[id*='State']", "select[id*='state']"]:
|
||||
try:
|
||||
if page.locator(sel).count() > 0:
|
||||
page.locator(sel).first.select_option(value="FL")
|
||||
state_filled = True
|
||||
print(f" State filled with 'FL' via {sel}")
|
||||
break
|
||||
except Exception as e:
|
||||
pass
|
||||
if not state_filled:
|
||||
# Try option label "Florida"
|
||||
for sel in ["select"]:
|
||||
try:
|
||||
selects = page.locator(sel).all()
|
||||
for s in selects:
|
||||
try:
|
||||
s.select_option(label="Florida")
|
||||
state_filled = True
|
||||
print(f" State filled with 'Florida' label")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if state_filled:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Now try submit search
|
||||
try:
|
||||
page.locator("button:has-text('Search'), input[type='submit']").first.click()
|
||||
page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
time.sleep(2)
|
||||
print(f" Search submitted. URL after: {page.url}")
|
||||
results_html = page.content()
|
||||
with open("scripts/_hud_results_fl.html", "w", encoding="utf-8") as f:
|
||||
f.write(results_html)
|
||||
print(f" Results HTML saved: scripts/_hud_results_fl.html ({len(results_html):,} chars)")
|
||||
|
||||
# Inspect what came back — tables or cards?
|
||||
tables = page.locator("table").all()
|
||||
print(f" tables found: {len(tables)}")
|
||||
cards = page.locator(".propertyResult, .property-result, .listing, [class*='listing']").all()
|
||||
print(f" card-like elements: {len(cards)}")
|
||||
|
||||
# If table-based, dump first 3 rows of each table
|
||||
for i, t in enumerate(tables[:3]):
|
||||
rows = t.locator("tr").all()
|
||||
print(f" Table [{i}]: {len(rows)} rows")
|
||||
for j, r in enumerate(rows[:3]):
|
||||
cells = [(c.text_content() or "").strip()[:50] for c in r.locator("td, th").all()]
|
||||
non_empty = [c for c in cells if c]
|
||||
if non_empty:
|
||||
print(f" Row {j}: {non_empty}")
|
||||
|
||||
# Look for links to property detail
|
||||
print()
|
||||
print(" Property detail links (sample):")
|
||||
for link in page.locator("a").all()[:50]:
|
||||
try:
|
||||
href = link.get_attribute("href") or ""
|
||||
if "property" in href.lower() or "case" in href.lower() or "listing" in href.lower():
|
||||
if "javascript" not in href:
|
||||
text = (link.text_content() or "").strip()[:60]
|
||||
print(f" href={href} | text='{text}'")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f" Search submit ERROR: {e}")
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user