feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
"""Probe Miami-Dade PA detail page — fetch real folio and map fields."""
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
|
||||
def probe():
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
out_dir = Path(__file__).parent.parent / "_probe_out" / "mdpa"
|
||||
folio = "31-2202-034-2470" # 19201 COLLINS AVE — real deal
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
ctx = browser.new_context(
|
||||
user_agent="Mozilla/5.0 Chrome/131",
|
||||
)
|
||||
page = ctx.new_page()
|
||||
|
||||
# Land then go to detail directly if URL is parameterized
|
||||
# Try direct deep link via folio
|
||||
# MIA PA accepts folio in URL: /PropertySearch/#/?folio=XXXXXX (likely)
|
||||
folio_clean = folio.replace("-", "")
|
||||
deep_urls = [
|
||||
f"https://apps.miamidadepa.gov/PropertySearch/#/?folio={folio_clean}",
|
||||
f"https://apps.miamidadepa.gov/PropertySearch/#/details?folio={folio_clean}",
|
||||
f"https://apps.miamidadepa.gov/PropertySearch/#/property/{folio_clean}",
|
||||
]
|
||||
|
||||
for url in deep_urls:
|
||||
print(f"[Try] {url}")
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
time.sleep(8)
|
||||
print(f" URL after load: {page.url}")
|
||||
print(f" Title: {page.title()}")
|
||||
body = page.inner_text("body")[:300]
|
||||
print(f" Body: {body[:200].encode('ascii','replace').decode('ascii')}")
|
||||
# If we see property details, stop
|
||||
if any(kw in body.lower() for kw in ("owner", "folio:", "year built")):
|
||||
print(" HIT - detail page!")
|
||||
break
|
||||
time.sleep(2)
|
||||
|
||||
# If deep link didn't work, do search via form
|
||||
body = page.inner_text("body")
|
||||
if "owner" not in body.lower() or "year built" not in body.lower():
|
||||
print("\n[Fallback] Doing form search via Folio tab...")
|
||||
page.goto("https://apps.miamidadepa.gov/PropertySearch/", wait_until="domcontentloaded")
|
||||
time.sleep(6)
|
||||
|
||||
# Click Folio tab
|
||||
print(" Clicking Folio tab...")
|
||||
folio_tab = page.locator("li[id^='k-tabstrip-tab']:has-text('Folio')").first
|
||||
folio_tab.click()
|
||||
time.sleep(2)
|
||||
|
||||
# Fill folio input
|
||||
print(f" Filling folio {folio_clean}...")
|
||||
folio_input = page.locator("kendo-textbox[formcontrolname='folio'] input").first
|
||||
if folio_input.count() == 0:
|
||||
# Try alternate selector
|
||||
folio_input = page.locator("input.k-input-inner").nth(0)
|
||||
folio_input.fill(folio_clean)
|
||||
time.sleep(1)
|
||||
|
||||
# Click search button
|
||||
search_btn = page.locator("button[aria-label='Search button']").first
|
||||
search_btn.click()
|
||||
time.sleep(8)
|
||||
print(f" URL after search: {page.url}")
|
||||
|
||||
(out_dir / "02_detail.html").write_text(page.content(), encoding="utf-8")
|
||||
page.screenshot(path=str(out_dir / "02_detail.png"), full_page=True)
|
||||
|
||||
# Dump all element IDs with text
|
||||
print("\n[Dumping populated elements...]")
|
||||
elements = page.evaluate("""
|
||||
() => {
|
||||
const out = [];
|
||||
const all = document.querySelectorAll('[id], [class*="owner"], [class*="folio"], [class*="year"], [class*="value"]');
|
||||
for (const el of all) {
|
||||
const txt = (el.textContent || '').trim();
|
||||
if (txt && txt.length < 200 && el.children.length < 4) {
|
||||
out.push({
|
||||
id: el.id || '(no id)',
|
||||
cls: (el.className || '').substring(0, 60),
|
||||
text: txt.substring(0, 150),
|
||||
});
|
||||
}
|
||||
}
|
||||
// Dedupe by (id, text)
|
||||
const seen = new Set();
|
||||
return out.filter(e => {
|
||||
const k = e.id + '|' + e.text;
|
||||
if (seen.has(k)) return false;
|
||||
seen.add(k);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
""")
|
||||
|
||||
# Print only elements with values that look meaningful
|
||||
keywords = ("owner", "folio", "year", "built", "address", "value", "tax",
|
||||
"sale", "deed", "bed", "bath", "sqft", "lot", "use", "subdivision",
|
||||
"zoning", "homestead", "assessed", "market")
|
||||
for e in elements[:300]:
|
||||
txt_lower = e["text"].lower()
|
||||
cls_lower = e["cls"].lower()
|
||||
id_lower = e["id"].lower()
|
||||
if any(k in txt_lower or k in cls_lower or k in id_lower for k in keywords):
|
||||
safe = e["text"][:120].encode("ascii", "replace").decode("ascii")
|
||||
print(f" {e['id'][:40]:40s} cls={e['cls'][:40]:40s} = {safe!r}")
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
probe()
|
||||
Reference in New Issue
Block a user