feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+82
View File
@@ -0,0 +1,82 @@
"""Dump Results.aspx HTML to find owner/RE# patterns."""
from __future__ import annotations
import io, sys, time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
from playwright.sync_api import sync_playwright
USER_AGENT = "AR-House/1.0 (real estate investment analysis)"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(user_agent=USER_AGENT)
page = context.new_page()
page.set_default_timeout(20_000)
page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx",
wait_until="networkidle", timeout=20_000)
page.locator("#ctl00_cphBody_tbStreetNumber").fill("3245")
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value="N")
page.locator("#ctl00_cphBody_tbStreetName").fill("PEARL")
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value="ST")
# Submit via form.submit
page.evaluate("""() => {
const form = document.forms[0] || document.querySelector('form');
form.action = 'Results.aspx';
let hidden = document.createElement('input');
hidden.type = 'hidden';
hidden.name = 'ctl00$cphBody$bSearch';
hidden.value = 'Search';
form.appendChild(hidden);
form.submit();
}""")
page.wait_for_url("**Results.aspx**", timeout=10_000)
page.wait_for_load_state("networkidle", timeout=10_000)
print("URL:", page.url)
print("Title:", page.title())
print()
# Buscar tablas
tables = page.locator("table").all()
print(f"Tables: {len(tables)}")
for i, t in enumerate(tables):
try:
rows = t.locator("tr").all()
print(f"\n--- Table [{i}] ({len(rows)} rows) ---")
for j, r in enumerate(rows[:5]):
cells = r.locator("td, th").all()
texts = [(c.text_content() or "").strip()[:60] for c in cells]
print(f" Row {j}: {texts}")
except Exception as e:
print(f" Error: {e}")
# Dump full HTML for parsing
html = page.content()
with open("scripts/_duval_pa_results.html", "w", encoding="utf-8") as f:
f.write(html)
print(f"\nFull HTML: scripts/_duval_pa_results.html ({len(html):,} chars)")
# Search for any text containing "Owner" or "RE"
print("\n--- Text matches for 'Owner', 'RE Number', 'Year Built' ---")
body_text = page.locator("body").inner_text()
for kw in ["Owner", "RE Number", "RE #", "Year Built", "Just Value", "Assessed Value", "Last Sale"]:
if kw in body_text:
idx = body_text.find(kw)
print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-30):idx+100]}...")
else:
print(f" '{kw}': NOT FOUND")
# Buscar <a> que apunten a propiedades individuales
links = page.locator("a").all()
print(f"\n--- Links ({len(links)} total) ---")
for i, l in enumerate(links):
href = l.get_attribute("href") or ""
if "Property" in href or "RE=" in href or "RealEstate" in href or "Detail" in href:
print(f" [{i}] href={href} text='{(l.text_content() or '').strip()[:50]}'")
if i > 50:
break
browser.close()