143 lines
5.1 KiB
Python
143 lines
5.1 KiB
Python
"""Intercept network calls during HUD search to find the JSON API."""
|
|
from __future__ import annotations
|
|
import io, sys, time, json
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
|
|
|
captured_requests = []
|
|
captured_responses = []
|
|
|
|
|
|
def on_request(req):
|
|
url = req.url
|
|
if any(kw in url.lower() for kw in ["api", "search", "property", "listing", "result", "auto"]):
|
|
captured_requests.append({
|
|
"method": req.method,
|
|
"url": url,
|
|
"headers": dict(req.headers),
|
|
"post_data": req.post_data,
|
|
})
|
|
|
|
|
|
def on_response(resp):
|
|
url = resp.url
|
|
if any(kw in url.lower() for kw in ["api", "search", "property", "listing", "result", "auto"]):
|
|
try:
|
|
ct = resp.headers.get("content-type", "")
|
|
if "json" in ct:
|
|
body = resp.json()
|
|
captured_responses.append({
|
|
"url": url,
|
|
"status": resp.status,
|
|
"content_type": ct,
|
|
"body_preview": json.dumps(body)[:500] if body else "",
|
|
"body_keys": list(body.keys()) if isinstance(body, dict) else type(body).__name__,
|
|
})
|
|
elif resp.status >= 200 and resp.status < 400:
|
|
captured_responses.append({
|
|
"url": url,
|
|
"status": resp.status,
|
|
"content_type": ct,
|
|
"body_size": len(resp.body() or b""),
|
|
})
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(
|
|
user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
|
|
locale="en-US", timezone_id="America/New_York",
|
|
)
|
|
page = context.new_page()
|
|
page.on("request", on_request)
|
|
page.on("response", on_response)
|
|
page.set_default_timeout(30_000)
|
|
|
|
# 1. Load landing
|
|
print("[1] Loading landing...")
|
|
page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
|
|
time.sleep(2)
|
|
|
|
# 2. Type "FL" via JavaScript directly (label intercepts pointer events)
|
|
print("[2] Setting cityStateZip value via JS + triggering oninput...")
|
|
page.evaluate("""() => {
|
|
const inp = document.getElementById('cityStateZip');
|
|
inp.value = 'FL';
|
|
// Trigger the oninput handler manually
|
|
const event = new Event('input', { bubbles: true });
|
|
inp.dispatchEvent(event);
|
|
// Also call the explicit ysi handler if exists
|
|
if (typeof ysi !== 'undefined' && ysi.corpsearchfilter) {
|
|
ysi.corpsearchfilter.changeInput('FL', 'home');
|
|
}
|
|
}""")
|
|
time.sleep(3) # Wait for autocomplete
|
|
|
|
# 3. Inspect autocomplete options
|
|
print("[3] Inspecting autocomplete dropdown...")
|
|
autocomplete_items = page.locator("#cityStateZipautocomplete-list li, [role='option']").all()
|
|
print(f" {len(autocomplete_items)} autocomplete items")
|
|
for i, item in enumerate(autocomplete_items[:10]):
|
|
text = (item.text_content() or "").strip()[:80]
|
|
print(f" [{i}] {text!r}")
|
|
|
|
# 4. Click "Florida" option via JS
|
|
if autocomplete_items:
|
|
for item in autocomplete_items:
|
|
text = (item.text_content() or "").lower()
|
|
if "florida" in text:
|
|
print(f"[4] Clicking Florida option: {text[:60]}")
|
|
try:
|
|
item.click(force=True)
|
|
except Exception as e:
|
|
print(f" click failed: {e}, trying evaluate")
|
|
page.evaluate("(el) => el.click()", item.element_handle())
|
|
break
|
|
else:
|
|
# fallback: click first
|
|
print("[4] No Florida match found; clicking first item")
|
|
try:
|
|
autocomplete_items[0].click(force=True)
|
|
except Exception:
|
|
page.evaluate("(el) => el.click()", autocomplete_items[0].element_handle())
|
|
time.sleep(5)
|
|
|
|
# 5. Wait for search results to populate
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
time.sleep(3)
|
|
|
|
print()
|
|
print("[5] After search — final URL:", page.url)
|
|
print()
|
|
|
|
# Show all captured API endpoints
|
|
print("=" * 60)
|
|
print("CAPTURED REQUESTS")
|
|
print("=" * 60)
|
|
for req in captured_requests[:30]:
|
|
url_short = req["url"][:150]
|
|
print(f" {req['method']} {url_short}")
|
|
if req.get("post_data"):
|
|
print(f" post_data: {req['post_data'][:200]}")
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print("CAPTURED RESPONSES (JSON only)")
|
|
print("=" * 60)
|
|
for resp in captured_responses[:20]:
|
|
url_short = resp["url"][:120]
|
|
print(f"\n {resp['status']} {url_short}")
|
|
print(f" ct: {resp['content_type']}")
|
|
if resp.get("body_keys"):
|
|
print(f" body_keys: {resp['body_keys']}")
|
|
if resp.get("body_preview"):
|
|
print(f" preview: {resp['body_preview']}")
|
|
|
|
browser.close()
|