"""Probe Orange County PA — find right deep link + data structure.""" from pathlib import Path import time def probe(): from playwright.sync_api import sync_playwright out_dir = Path(__file__).parent.parent / "_probe_out" / "ocpa" out_dir.mkdir(parents=True, exist_ok=True) parcel = "292433310901080" # 3142 TIMUCUA CIR with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context(user_agent="Mozilla/5.0 Chrome/131").new_page() # Capture XHR responses = [] page.on("response", lambda r: responses.append( {"url": r.url, "status": r.status, "ct": r.headers.get("content-type", "")[:30]} ) if r.status >= 200 and r.status < 400 else None) urls = [ f"https://ocpaweb.ocpafl.org/parcelsearch?parcel={parcel}", f"https://ocpaweb.ocpafl.org/Parcel/{parcel}", f"https://www.ocpafl.org/searches/parcelsearch.aspx?parcel={parcel}", ] for url in urls: page.goto(url, wait_until="domcontentloaded", timeout=20000) time.sleep(8) body = page.inner_text("body")[:500] print(f"\n[{url}]") print(f" URL after: {page.url}") print(f" Body snippet: {body[:300].encode('ascii','replace').decode('ascii')}") # Check if data is present if "owner" in body.lower() and any(w in body.lower() for w in ("year built", "just value", "sale")): print(" DATA FOUND!") (out_dir / "01_detail.html").write_text(page.content(), encoding="utf-8") break # Print relevant API responses print(f"\nRelevant XHR responses ({len(responses)}):") for r in responses[-15:]: if "ocpa" in r["url"].lower() and ("json" in r["ct"] or "Parcel" in r["url"] or "Property" in r["url"]): print(f" {r['status']} [{r['ct']}] {r['url'][:140]}") browser.close() if __name__ == "__main__": probe()