feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
"""Dump Results.aspx HTML to find owner/RE# patterns."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(user_agent=USER_AGENT)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(20_000)
|
||||
|
||||
page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx",
|
||||
wait_until="networkidle", timeout=20_000)
|
||||
|
||||
page.locator("#ctl00_cphBody_tbStreetNumber").fill("3245")
|
||||
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value="N")
|
||||
page.locator("#ctl00_cphBody_tbStreetName").fill("PEARL")
|
||||
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value="ST")
|
||||
|
||||
# Submit via form.submit
|
||||
page.evaluate("""() => {
|
||||
const form = document.forms[0] || document.querySelector('form');
|
||||
form.action = 'Results.aspx';
|
||||
let hidden = document.createElement('input');
|
||||
hidden.type = 'hidden';
|
||||
hidden.name = 'ctl00$cphBody$bSearch';
|
||||
hidden.value = 'Search';
|
||||
form.appendChild(hidden);
|
||||
form.submit();
|
||||
}""")
|
||||
page.wait_for_url("**Results.aspx**", timeout=10_000)
|
||||
page.wait_for_load_state("networkidle", timeout=10_000)
|
||||
|
||||
print("URL:", page.url)
|
||||
print("Title:", page.title())
|
||||
print()
|
||||
|
||||
# Buscar tablas
|
||||
tables = page.locator("table").all()
|
||||
print(f"Tables: {len(tables)}")
|
||||
for i, t in enumerate(tables):
|
||||
try:
|
||||
rows = t.locator("tr").all()
|
||||
print(f"\n--- Table [{i}] ({len(rows)} rows) ---")
|
||||
for j, r in enumerate(rows[:5]):
|
||||
cells = r.locator("td, th").all()
|
||||
texts = [(c.text_content() or "").strip()[:60] for c in cells]
|
||||
print(f" Row {j}: {texts}")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
# Dump full HTML for parsing
|
||||
html = page.content()
|
||||
with open("scripts/_duval_pa_results.html", "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
print(f"\nFull HTML: scripts/_duval_pa_results.html ({len(html):,} chars)")
|
||||
|
||||
# Search for any text containing "Owner" or "RE"
|
||||
print("\n--- Text matches for 'Owner', 'RE Number', 'Year Built' ---")
|
||||
body_text = page.locator("body").inner_text()
|
||||
for kw in ["Owner", "RE Number", "RE #", "Year Built", "Just Value", "Assessed Value", "Last Sale"]:
|
||||
if kw in body_text:
|
||||
idx = body_text.find(kw)
|
||||
print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-30):idx+100]}...")
|
||||
else:
|
||||
print(f" '{kw}': NOT FOUND")
|
||||
|
||||
# Buscar <a> que apunten a propiedades individuales
|
||||
links = page.locator("a").all()
|
||||
print(f"\n--- Links ({len(links)} total) ---")
|
||||
for i, l in enumerate(links):
|
||||
href = l.get_attribute("href") or ""
|
||||
if "Property" in href or "RE=" in href or "RealEstate" in href or "Detail" in href:
|
||||
print(f" [{i}] href={href} text='{(l.text_content() or '').strip()[:50]}'")
|
||||
if i > 50:
|
||||
break
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user