83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
"""Dump the Duval Property Appraiser search page DOM to find real selectors."""
|
|
from __future__ import annotations
|
|
import io, sys
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_context(user_agent=USER_AGENT).new_page()
|
|
page.set_default_timeout(15_000)
|
|
|
|
# Try landing page first
|
|
print("=" * 70)
|
|
print("LANDING: https://paopropertysearch.coj.net/")
|
|
print("=" * 70)
|
|
page.goto("https://paopropertysearch.coj.net/", wait_until="domcontentloaded")
|
|
print("Final URL:", page.url)
|
|
print()
|
|
|
|
# Dump ALL inputs on the page
|
|
print("--- ALL <input> elements ---")
|
|
inputs = page.locator("input").all()
|
|
for i, inp in enumerate(inputs[:30]):
|
|
try:
|
|
attrs = page.evaluate("""(el) => {
|
|
const out = {};
|
|
for (const a of el.attributes) out[a.name] = a.value;
|
|
return out;
|
|
}""", inp.element_handle())
|
|
print(f" [{i}] {attrs}")
|
|
except Exception as e:
|
|
print(f" [{i}] (couldn't get attrs: {e})")
|
|
|
|
print()
|
|
print("--- ALL <select> elements ---")
|
|
for i, sel in enumerate(page.locator("select").all()[:10]):
|
|
try:
|
|
attrs = page.evaluate("""(el) => {
|
|
const out = {};
|
|
for (const a of el.attributes) out[a.name] = a.value;
|
|
return out;
|
|
}""", sel.element_handle())
|
|
print(f" [{i}] {attrs}")
|
|
except Exception as e:
|
|
print(f" [{i}] (couldn't get attrs: {e})")
|
|
|
|
print()
|
|
print("--- ALL <a> links (first 30) ---")
|
|
for i, link in enumerate(page.locator("a").all()[:30]):
|
|
try:
|
|
href = link.get_attribute("href") or ""
|
|
text = (link.text_content() or "").strip()[:80]
|
|
if href and not href.startswith("#"):
|
|
print(f" [{i}] href={href} text='{text}'")
|
|
except Exception:
|
|
pass
|
|
|
|
print()
|
|
print("--- All buttons + submit inputs ---")
|
|
for i, btn in enumerate(page.locator("button, input[type='submit'], input[type='button']").all()[:15]):
|
|
try:
|
|
attrs = page.evaluate("""(el) => {
|
|
const out = {};
|
|
for (const a of el.attributes) out[a.name] = a.value;
|
|
return out;
|
|
}""", btn.element_handle())
|
|
text = (btn.text_content() or "").strip()[:50]
|
|
print(f" [{i}] {attrs} text='{text}'")
|
|
except Exception:
|
|
pass
|
|
|
|
# Save full HTML for inspection
|
|
html = page.content()
|
|
out_file = "scripts/_duval_pa_landing.html"
|
|
with open(out_file, "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
print(f"\nFull HTML saved to: {out_file} ({len(html):,} chars)")
|
|
|
|
browser.close()
|