72 lines
2.6 KiB
Python
72 lines
2.6 KiB
Python
"""Explore or.duvalclerk.com structure for lis pendens search."""
|
|
from __future__ import annotations
|
|
import io, sys
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_context(user_agent=USER_AGENT).new_page()
|
|
page.set_default_timeout(20_000)
|
|
|
|
print("Loading or.duvalclerk.com ...")
|
|
page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000)
|
|
print("URL:", page.url, "Title:", page.title())
|
|
print()
|
|
|
|
# Look for all links
|
|
print("--- ALL LINKS ---")
|
|
for i, link in enumerate(page.locator("a").all()[:40]):
|
|
href = link.get_attribute("href") or ""
|
|
text = (link.text_content() or "").strip()[:80]
|
|
if href and not href.startswith("javascript:") and not href.startswith("mailto:"):
|
|
print(f" [{i}] href={href} text='{text}'")
|
|
|
|
print()
|
|
print("--- ALL BUTTONS ---")
|
|
for i, btn in enumerate(page.locator("button, input[type='button'], input[type='submit']").all()[:15]):
|
|
attrs = page.evaluate("""(el) => {
|
|
const out = {};
|
|
for (const a of el.attributes) out[a.name] = a.value;
|
|
return out;
|
|
}""", btn.element_handle())
|
|
text = (btn.text_content() or "").strip()[:50]
|
|
print(f" [{i}] {attrs} text='{text}'")
|
|
|
|
# Save HTML
|
|
with open("scripts/_duval_or_landing.html", "w", encoding="utf-8") as f:
|
|
f.write(page.content())
|
|
print(f"\nFull HTML saved: scripts/_duval_or_landing.html")
|
|
|
|
# Look for forms
|
|
print()
|
|
print("--- FORMS ---")
|
|
forms = page.locator("form").all()
|
|
print(f" {len(forms)} forms")
|
|
for i, f in enumerate(forms[:3]):
|
|
action = f.get_attribute("action") or ""
|
|
method = f.get_attribute("method") or ""
|
|
print(f" [{i}] action={action} method={method}")
|
|
|
|
# Look for inputs
|
|
print()
|
|
print("--- INPUTS (first 20) ---")
|
|
for i, inp in enumerate(page.locator("input").all()[:20]):
|
|
attrs = page.evaluate("""(el) => {
|
|
const out = {};
|
|
for (const a of el.attributes) out[a.name] = a.value;
|
|
return out;
|
|
}""", inp.element_handle())
|
|
print(f" [{i}] {attrs}")
|
|
|
|
# Check if there's a disclaimer that needs accepting
|
|
print()
|
|
print("--- Text content (first 1000 chars) ---")
|
|
body_text = page.locator("body").inner_text()[:1000]
|
|
print(body_text)
|
|
|
|
browser.close()
|