"""Explore or.duvalclerk.com structure for lis pendens search.""" from __future__ import annotations import io, sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright USER_AGENT = "AR-House/1.0 (real estate investment analysis)" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context(user_agent=USER_AGENT).new_page() page.set_default_timeout(20_000) print("Loading or.duvalclerk.com ...") page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000) print("URL:", page.url, "Title:", page.title()) print() # Look for all links print("--- ALL LINKS ---") for i, link in enumerate(page.locator("a").all()[:40]): href = link.get_attribute("href") or "" text = (link.text_content() or "").strip()[:80] if href and not href.startswith("javascript:") and not href.startswith("mailto:"): print(f" [{i}] href={href} text='{text}'") print() print("--- ALL BUTTONS ---") for i, btn in enumerate(page.locator("button, input[type='button'], input[type='submit']").all()[:15]): attrs = page.evaluate("""(el) => { const out = {}; for (const a of el.attributes) out[a.name] = a.value; return out; }""", btn.element_handle()) text = (btn.text_content() or "").strip()[:50] print(f" [{i}] {attrs} text='{text}'") # Save HTML with open("scripts/_duval_or_landing.html", "w", encoding="utf-8") as f: f.write(page.content()) print(f"\nFull HTML saved: scripts/_duval_or_landing.html") # Look for forms print() print("--- FORMS ---") forms = page.locator("form").all() print(f" {len(forms)} forms") for i, f in enumerate(forms[:3]): action = f.get_attribute("action") or "" method = f.get_attribute("method") or "" print(f" [{i}] action={action} method={method}") # Look for inputs print() print("--- INPUTS (first 20) ---") for i, inp in enumerate(page.locator("input").all()[:20]): attrs = page.evaluate("""(el) => { const out = {}; for (const a of el.attributes) out[a.name] = a.value; return out; }""", inp.element_handle()) print(f" [{i}] {attrs}") # Check if there's a disclaimer that needs accepting print() print("--- Text content (first 1000 chars) ---") body_text = page.locator("body").inner_text()[:1000] print(body_text) browser.close()