feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
"""Probe the discovered URL pattern ?citystate=FL."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(
|
||||
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
||||
locale="en-US", timezone_id="America/New_York",
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(30_000)
|
||||
|
||||
# First load landing (set cookies)
|
||||
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
|
||||
time.sleep(2)
|
||||
|
||||
# Then load FL results
|
||||
print("Loading citystate=FL...")
|
||||
page.goto("https://www.hudhomestore.gov/searchresult?citystate=FL",
|
||||
wait_until="networkidle", timeout=30_000)
|
||||
print(f"URL: {page.url}")
|
||||
print(f"Title: {page.title()}")
|
||||
|
||||
# Wait longer for SPA to render property cards
|
||||
time.sleep(6)
|
||||
|
||||
# Check body for results count
|
||||
body_text = page.locator("body").inner_text()
|
||||
print()
|
||||
print("--- BODY TEXT (first 1500) ---")
|
||||
print(body_text[:1500])
|
||||
|
||||
# Count property cards / listings
|
||||
print()
|
||||
print("--- LISTING SELECTORS ---")
|
||||
for sel in ["[class*='property-card']", "[class*='propertyCard']",
|
||||
"[class*='listing-item']", "[class*='listing-card']",
|
||||
"[class*='home-card']", "[class*='result-item']",
|
||||
"[data-property]", "article", ".property", ".listing",
|
||||
"[id*='property-list']"]:
|
||||
n = page.locator(sel).count()
|
||||
if n > 0:
|
||||
print(f" {sel}: {n}")
|
||||
|
||||
# Save full HTML
|
||||
html = page.content()
|
||||
with open("scripts/_hud_citystate_fl.html", "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
print(f"\nHTML saved: scripts/_hud_citystate_fl.html ({len(html):,} chars)")
|
||||
|
||||
# Try common property card class patterns from Yardi
|
||||
candidates = page.locator("li[id^='property'], div[id^='property'], li[class*='listing'], li[class*='card']").all()
|
||||
print(f"\nPropertycards via id^=property selector: {len(candidates)}")
|
||||
for i, el in enumerate(candidates[:5]):
|
||||
txt = (el.text_content() or "").strip()[:300]
|
||||
print(f" [{i}] {txt!r}")
|
||||
|
||||
# Try article elements
|
||||
articles = page.locator("article").all()
|
||||
print(f"\nArticles: {len(articles)}")
|
||||
for i, a in enumerate(articles[:5]):
|
||||
txt = (a.text_content() or "").strip()[:300]
|
||||
attrs = page.evaluate("(el) => { const o={}; for(const a of el.attributes) o[a.name]=a.value; return o; }", a.element_handle())
|
||||
print(f" [{i}] {attrs}")
|
||||
print(f" text: {txt!r}")
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user