feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+117
View File
@@ -0,0 +1,117 @@
"""Probe Miami-Dade PA detail page — fetch real folio and map fields."""
from pathlib import Path
import time
def probe():
from playwright.sync_api import sync_playwright
out_dir = Path(__file__).parent.parent / "_probe_out" / "mdpa"
folio = "31-2202-034-2470" # 19201 COLLINS AVE — real deal
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
ctx = browser.new_context(
user_agent="Mozilla/5.0 Chrome/131",
)
page = ctx.new_page()
# Land then go to detail directly if URL is parameterized
# Try direct deep link via folio
# MIA PA accepts folio in URL: /PropertySearch/#/?folio=XXXXXX (likely)
folio_clean = folio.replace("-", "")
deep_urls = [
f"https://apps.miamidadepa.gov/PropertySearch/#/?folio={folio_clean}",
f"https://apps.miamidadepa.gov/PropertySearch/#/details?folio={folio_clean}",
f"https://apps.miamidadepa.gov/PropertySearch/#/property/{folio_clean}",
]
for url in deep_urls:
print(f"[Try] {url}")
page.goto(url, wait_until="domcontentloaded", timeout=30000)
time.sleep(8)
print(f" URL after load: {page.url}")
print(f" Title: {page.title()}")
body = page.inner_text("body")[:300]
print(f" Body: {body[:200].encode('ascii','replace').decode('ascii')}")
# If we see property details, stop
if any(kw in body.lower() for kw in ("owner", "folio:", "year built")):
print(" HIT - detail page!")
break
time.sleep(2)
# If deep link didn't work, do search via form
body = page.inner_text("body")
if "owner" not in body.lower() or "year built" not in body.lower():
print("\n[Fallback] Doing form search via Folio tab...")
page.goto("https://apps.miamidadepa.gov/PropertySearch/", wait_until="domcontentloaded")
time.sleep(6)
# Click Folio tab
print(" Clicking Folio tab...")
folio_tab = page.locator("li[id^='k-tabstrip-tab']:has-text('Folio')").first
folio_tab.click()
time.sleep(2)
# Fill folio input
print(f" Filling folio {folio_clean}...")
folio_input = page.locator("kendo-textbox[formcontrolname='folio'] input").first
if folio_input.count() == 0:
# Try alternate selector
folio_input = page.locator("input.k-input-inner").nth(0)
folio_input.fill(folio_clean)
time.sleep(1)
# Click search button
search_btn = page.locator("button[aria-label='Search button']").first
search_btn.click()
time.sleep(8)
print(f" URL after search: {page.url}")
(out_dir / "02_detail.html").write_text(page.content(), encoding="utf-8")
page.screenshot(path=str(out_dir / "02_detail.png"), full_page=True)
# Dump all element IDs with text
print("\n[Dumping populated elements...]")
elements = page.evaluate("""
() => {
const out = [];
const all = document.querySelectorAll('[id], [class*="owner"], [class*="folio"], [class*="year"], [class*="value"]');
for (const el of all) {
const txt = (el.textContent || '').trim();
if (txt && txt.length < 200 && el.children.length < 4) {
out.push({
id: el.id || '(no id)',
cls: (el.className || '').substring(0, 60),
text: txt.substring(0, 150),
});
}
}
// Dedupe by (id, text)
const seen = new Set();
return out.filter(e => {
const k = e.id + '|' + e.text;
if (seen.has(k)) return false;
seen.add(k);
return true;
});
}
""")
# Print only elements with values that look meaningful
keywords = ("owner", "folio", "year", "built", "address", "value", "tax",
"sale", "deed", "bed", "bath", "sqft", "lot", "use", "subdivision",
"zoning", "homestead", "assessed", "market")
for e in elements[:300]:
txt_lower = e["text"].lower()
cls_lower = e["cls"].lower()
id_lower = e["id"].lower()
if any(k in txt_lower or k in cls_lower or k in id_lower for k in keywords):
safe = e["text"][:120].encode("ascii", "replace").decode("ascii")
print(f" {e['id'][:40]:40s} cls={e['cls'][:40]:40s} = {safe!r}")
browser.close()
if __name__ == "__main__":
probe()