Files
AR-House/scripts/explore_duval_official.py
T
2026-07-03 12:24:58 -04:00

72 lines
2.6 KiB
Python

"""Explore or.duvalclerk.com structure for lis pendens search."""
from __future__ import annotations
import io, sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
from playwright.sync_api import sync_playwright
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_context(user_agent=USER_AGENT).new_page()
page.set_default_timeout(20_000)
print("Loading or.duvalclerk.com ...")
page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000)
print("URL:", page.url, "Title:", page.title())
print()
# Look for all links
print("--- ALL LINKS ---")
for i, link in enumerate(page.locator("a").all()[:40]):
href = link.get_attribute("href") or ""
text = (link.text_content() or "").strip()[:80]
if href and not href.startswith("javascript:") and not href.startswith("mailto:"):
print(f" [{i}] href={href} text='{text}'")
print()
print("--- ALL BUTTONS ---")
for i, btn in enumerate(page.locator("button, input[type='button'], input[type='submit']").all()[:15]):
attrs = page.evaluate("""(el) => {
const out = {};
for (const a of el.attributes) out[a.name] = a.value;
return out;
}""", btn.element_handle())
text = (btn.text_content() or "").strip()[:50]
print(f" [{i}] {attrs} text='{text}'")
# Save HTML
with open("scripts/_duval_or_landing.html", "w", encoding="utf-8") as f:
f.write(page.content())
print(f"\nFull HTML saved: scripts/_duval_or_landing.html")
# Look for forms
print()
print("--- FORMS ---")
forms = page.locator("form").all()
print(f" {len(forms)} forms")
for i, f in enumerate(forms[:3]):
action = f.get_attribute("action") or ""
method = f.get_attribute("method") or ""
print(f" [{i}] action={action} method={method}")
# Look for inputs
print()
print("--- INPUTS (first 20) ---")
for i, inp in enumerate(page.locator("input").all()[:20]):
attrs = page.evaluate("""(el) => {
const out = {};
for (const a of el.attributes) out[a.name] = a.value;
return out;
}""", inp.element_handle())
print(f" [{i}] {attrs}")
# Check if there's a disclaimer that needs accepting
print()
print("--- Text content (first 1000 chars) ---")
body_text = page.locator("body").inner_text()[:1000]
print(body_text)
browser.close()