feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
"""Find the deep-link URL pattern for HUD property detail pages."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time, re
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_context(
|
||||
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
||||
).new_page()
|
||||
page.set_default_timeout(30_000)
|
||||
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
|
||||
time.sleep(2)
|
||||
page.goto("https://www.hudhomestore.gov/searchresult?citystate=FL", wait_until="networkidle")
|
||||
time.sleep(6)
|
||||
|
||||
# Find all links inside cards
|
||||
print("=== ALL <a> elements inside property cards ===")
|
||||
cards = page.locator("div.topMap-card.card-body").all()
|
||||
print(f"Found {len(cards)} cards")
|
||||
|
||||
# Inspect the FIRST card in detail
|
||||
if cards:
|
||||
c1 = cards[0]
|
||||
print()
|
||||
print("--- CARD #1 HTML structure (first 2000 chars) ---")
|
||||
html = c1.evaluate("(el) => el.outerHTML")
|
||||
# Filter out script/style noise
|
||||
cleaned = re.sub(r"\s+", " ", html)[:2500]
|
||||
print(cleaned)
|
||||
|
||||
print()
|
||||
print("--- CARD #1 ALL <a> hrefs ---")
|
||||
anchors = c1.locator("a").all()
|
||||
for i, a in enumerate(anchors[:15]):
|
||||
href = a.get_attribute("href") or ""
|
||||
text = (a.text_content() or "").strip()[:60]
|
||||
print(f" [{i}] href={href} | text='{text}'")
|
||||
|
||||
# Also look for onclick handlers + data attributes
|
||||
print()
|
||||
print("--- CARD #1 elements with onclick / data-* ---")
|
||||
clickables = c1.locator("[onclick], [data-href], [data-url], [data-link], [data-property]").all()
|
||||
for el in clickables[:10]:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", el.element_handle())
|
||||
print(f" {el.evaluate('(el) => el.tagName')}: {attrs}")
|
||||
|
||||
# Check if there's a global pattern for property detail URLs in the page
|
||||
print()
|
||||
print("=== Looking for '/propertydetails' / '/Listing' anywhere in page ===")
|
||||
full_html = page.content()
|
||||
# Find href patterns
|
||||
urls = re.findall(
|
||||
r'href="([^"]*(?:propertydetail|propertyDetail|listing/PropertyDetail|case[Nn]umber)[^"]*)"',
|
||||
full_html, re.IGNORECASE,
|
||||
)
|
||||
for u in set(urls[:10]):
|
||||
print(f" {u}")
|
||||
|
||||
# Also look for data attribs with case#
|
||||
case_links = re.findall(
|
||||
r'(href|data-[a-z]+)="([^"]*093-?\d{6}[^"]*)"',
|
||||
full_html, re.IGNORECASE,
|
||||
)
|
||||
print(f"\nLinks containing a case number (093-XXXXXX):")
|
||||
for attr, url in case_links[:8]:
|
||||
print(f" {attr}={url}")
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user