Files
AR-House/scripts/explore_hud_api_intercept.py
T
2026-07-03 12:24:58 -04:00

143 lines
5.1 KiB
Python

"""Intercept network calls during HUD search to find the JSON API."""
from __future__ import annotations
import io, sys, time, json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
from playwright.sync_api import sync_playwright
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
captured_requests = []
captured_responses = []
def on_request(req):
url = req.url
if any(kw in url.lower() for kw in ["api", "search", "property", "listing", "result", "auto"]):
captured_requests.append({
"method": req.method,
"url": url,
"headers": dict(req.headers),
"post_data": req.post_data,
})
def on_response(resp):
url = resp.url
if any(kw in url.lower() for kw in ["api", "search", "property", "listing", "result", "auto"]):
try:
ct = resp.headers.get("content-type", "")
if "json" in ct:
body = resp.json()
captured_responses.append({
"url": url,
"status": resp.status,
"content_type": ct,
"body_preview": json.dumps(body)[:500] if body else "",
"body_keys": list(body.keys()) if isinstance(body, dict) else type(body).__name__,
})
elif resp.status >= 200 and resp.status < 400:
captured_responses.append({
"url": url,
"status": resp.status,
"content_type": ct,
"body_size": len(resp.body() or b""),
})
except Exception:
pass
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
locale="en-US", timezone_id="America/New_York",
)
page = context.new_page()
page.on("request", on_request)
page.on("response", on_response)
page.set_default_timeout(30_000)
# 1. Load landing
print("[1] Loading landing...")
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
time.sleep(2)
# 2. Type "FL" via JavaScript directly (label intercepts pointer events)
print("[2] Setting cityStateZip value via JS + triggering oninput...")
page.evaluate("""() => {
const inp = document.getElementById('cityStateZip');
inp.value = 'FL';
// Trigger the oninput handler manually
const event = new Event('input', { bubbles: true });
inp.dispatchEvent(event);
// Also call the explicit ysi handler if exists
if (typeof ysi !== 'undefined' && ysi.corpsearchfilter) {
ysi.corpsearchfilter.changeInput('FL', 'home');
}
}""")
time.sleep(3) # Wait for autocomplete
# 3. Inspect autocomplete options
print("[3] Inspecting autocomplete dropdown...")
autocomplete_items = page.locator("#cityStateZipautocomplete-list li, [role='option']").all()
print(f" {len(autocomplete_items)} autocomplete items")
for i, item in enumerate(autocomplete_items[:10]):
text = (item.text_content() or "").strip()[:80]
print(f" [{i}] {text!r}")
# 4. Click "Florida" option via JS
if autocomplete_items:
for item in autocomplete_items:
text = (item.text_content() or "").lower()
if "florida" in text:
print(f"[4] Clicking Florida option: {text[:60]}")
try:
item.click(force=True)
except Exception as e:
print(f" click failed: {e}, trying evaluate")
page.evaluate("(el) => el.click()", item.element_handle())
break
else:
# fallback: click first
print("[4] No Florida match found; clicking first item")
try:
autocomplete_items[0].click(force=True)
except Exception:
page.evaluate("(el) => el.click()", autocomplete_items[0].element_handle())
time.sleep(5)
# 5. Wait for search results to populate
page.wait_for_load_state("networkidle", timeout=15_000)
time.sleep(3)
print()
print("[5] After search — final URL:", page.url)
print()
# Show all captured API endpoints
print("=" * 60)
print("CAPTURED REQUESTS")
print("=" * 60)
for req in captured_requests[:30]:
url_short = req["url"][:150]
print(f" {req['method']} {url_short}")
if req.get("post_data"):
print(f" post_data: {req['post_data'][:200]}")
print()
print("=" * 60)
print("CAPTURED RESPONSES (JSON only)")
print("=" * 60)
for resp in captured_responses[:20]:
url_short = resp["url"][:120]
print(f"\n {resp['status']} {url_short}")
print(f" ct: {resp['content_type']}")
if resp.get("body_keys"):
print(f" body_keys: {resp['body_keys']}")
if resp.get("body_preview"):
print(f" preview: {resp['body_preview']}")
browser.close()