feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
"""Dump the Duval Property Appraiser search page DOM to find real selectors."""
|
||||
from __future__ import annotations
|
||||
import io, sys
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_context(user_agent=USER_AGENT).new_page()
|
||||
page.set_default_timeout(15_000)
|
||||
|
||||
# Try landing page first
|
||||
print("=" * 70)
|
||||
print("LANDING: https://paopropertysearch.coj.net/")
|
||||
print("=" * 70)
|
||||
page.goto("https://paopropertysearch.coj.net/", wait_until="domcontentloaded")
|
||||
print("Final URL:", page.url)
|
||||
print()
|
||||
|
||||
# Dump ALL inputs on the page
|
||||
print("--- ALL <input> elements ---")
|
||||
inputs = page.locator("input").all()
|
||||
for i, inp in enumerate(inputs[:30]):
|
||||
try:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", inp.element_handle())
|
||||
print(f" [{i}] {attrs}")
|
||||
except Exception as e:
|
||||
print(f" [{i}] (couldn't get attrs: {e})")
|
||||
|
||||
print()
|
||||
print("--- ALL <select> elements ---")
|
||||
for i, sel in enumerate(page.locator("select").all()[:10]):
|
||||
try:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", sel.element_handle())
|
||||
print(f" [{i}] {attrs}")
|
||||
except Exception as e:
|
||||
print(f" [{i}] (couldn't get attrs: {e})")
|
||||
|
||||
print()
|
||||
print("--- ALL <a> links (first 30) ---")
|
||||
for i, link in enumerate(page.locator("a").all()[:30]):
|
||||
try:
|
||||
href = link.get_attribute("href") or ""
|
||||
text = (link.text_content() or "").strip()[:80]
|
||||
if href and not href.startswith("#"):
|
||||
print(f" [{i}] href={href} text='{text}'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print()
|
||||
print("--- All buttons + submit inputs ---")
|
||||
for i, btn in enumerate(page.locator("button, input[type='submit'], input[type='button']").all()[:15]):
|
||||
try:
|
||||
attrs = page.evaluate("""(el) => {
|
||||
const out = {};
|
||||
for (const a of el.attributes) out[a.name] = a.value;
|
||||
return out;
|
||||
}""", btn.element_handle())
|
||||
text = (btn.text_content() or "").strip()[:50]
|
||||
print(f" [{i}] {attrs} text='{text}'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Save full HTML for inspection
|
||||
html = page.content()
|
||||
out_file = "scripts/_duval_pa_landing.html"
|
||||
with open(out_file, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
print(f"\nFull HTML saved to: {out_file} ({len(html):,} chars)")
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user