83 lines
3.2 KiB
Python
83 lines
3.2 KiB
Python
"""Dump Results.aspx HTML to find owner/RE# patterns."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(user_agent=USER_AGENT)
|
|
page = context.new_page()
|
|
page.set_default_timeout(20_000)
|
|
|
|
page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx",
|
|
wait_until="networkidle", timeout=20_000)
|
|
|
|
page.locator("#ctl00_cphBody_tbStreetNumber").fill("3245")
|
|
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value="N")
|
|
page.locator("#ctl00_cphBody_tbStreetName").fill("PEARL")
|
|
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value="ST")
|
|
|
|
# Submit via form.submit
|
|
page.evaluate("""() => {
|
|
const form = document.forms[0] || document.querySelector('form');
|
|
form.action = 'Results.aspx';
|
|
let hidden = document.createElement('input');
|
|
hidden.type = 'hidden';
|
|
hidden.name = 'ctl00$cphBody$bSearch';
|
|
hidden.value = 'Search';
|
|
form.appendChild(hidden);
|
|
form.submit();
|
|
}""")
|
|
page.wait_for_url("**Results.aspx**", timeout=10_000)
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
|
|
print("URL:", page.url)
|
|
print("Title:", page.title())
|
|
print()
|
|
|
|
# Buscar tablas
|
|
tables = page.locator("table").all()
|
|
print(f"Tables: {len(tables)}")
|
|
for i, t in enumerate(tables):
|
|
try:
|
|
rows = t.locator("tr").all()
|
|
print(f"\n--- Table [{i}] ({len(rows)} rows) ---")
|
|
for j, r in enumerate(rows[:5]):
|
|
cells = r.locator("td, th").all()
|
|
texts = [(c.text_content() or "").strip()[:60] for c in cells]
|
|
print(f" Row {j}: {texts}")
|
|
except Exception as e:
|
|
print(f" Error: {e}")
|
|
|
|
# Dump full HTML for parsing
|
|
html = page.content()
|
|
with open("scripts/_duval_pa_results.html", "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
print(f"\nFull HTML: scripts/_duval_pa_results.html ({len(html):,} chars)")
|
|
|
|
# Search for any text containing "Owner" or "RE"
|
|
print("\n--- Text matches for 'Owner', 'RE Number', 'Year Built' ---")
|
|
body_text = page.locator("body").inner_text()
|
|
for kw in ["Owner", "RE Number", "RE #", "Year Built", "Just Value", "Assessed Value", "Last Sale"]:
|
|
if kw in body_text:
|
|
idx = body_text.find(kw)
|
|
print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-30):idx+100]}...")
|
|
else:
|
|
print(f" '{kw}': NOT FOUND")
|
|
|
|
# Buscar <a> que apunten a propiedades individuales
|
|
links = page.locator("a").all()
|
|
print(f"\n--- Links ({len(links)} total) ---")
|
|
for i, l in enumerate(links):
|
|
href = l.get_attribute("href") or ""
|
|
if "Property" in href or "RE=" in href or "RealEstate" in href or "Detail" in href:
|
|
print(f" [{i}] href={href} text='{(l.text_content() or '').strip()[:50]}'")
|
|
if i > 50:
|
|
break
|
|
|
|
browser.close()
|