118 lines
4.8 KiB
Python
118 lines
4.8 KiB
Python
"""Probe Miami-Dade PA detail page — fetch real folio and map fields."""
|
|
from pathlib import Path
|
|
import time
|
|
|
|
|
|
def probe():
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
out_dir = Path(__file__).parent.parent / "_probe_out" / "mdpa"
|
|
folio = "31-2202-034-2470" # 19201 COLLINS AVE — real deal
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 Chrome/131",
|
|
)
|
|
page = ctx.new_page()
|
|
|
|
# Land then go to detail directly if URL is parameterized
|
|
# Try direct deep link via folio
|
|
# MIA PA accepts folio in URL: /PropertySearch/#/?folio=XXXXXX (likely)
|
|
folio_clean = folio.replace("-", "")
|
|
deep_urls = [
|
|
f"https://apps.miamidadepa.gov/PropertySearch/#/?folio={folio_clean}",
|
|
f"https://apps.miamidadepa.gov/PropertySearch/#/details?folio={folio_clean}",
|
|
f"https://apps.miamidadepa.gov/PropertySearch/#/property/{folio_clean}",
|
|
]
|
|
|
|
for url in deep_urls:
|
|
print(f"[Try] {url}")
|
|
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
time.sleep(8)
|
|
print(f" URL after load: {page.url}")
|
|
print(f" Title: {page.title()}")
|
|
body = page.inner_text("body")[:300]
|
|
print(f" Body: {body[:200].encode('ascii','replace').decode('ascii')}")
|
|
# If we see property details, stop
|
|
if any(kw in body.lower() for kw in ("owner", "folio:", "year built")):
|
|
print(" HIT - detail page!")
|
|
break
|
|
time.sleep(2)
|
|
|
|
# If deep link didn't work, do search via form
|
|
body = page.inner_text("body")
|
|
if "owner" not in body.lower() or "year built" not in body.lower():
|
|
print("\n[Fallback] Doing form search via Folio tab...")
|
|
page.goto("https://apps.miamidadepa.gov/PropertySearch/", wait_until="domcontentloaded")
|
|
time.sleep(6)
|
|
|
|
# Click Folio tab
|
|
print(" Clicking Folio tab...")
|
|
folio_tab = page.locator("li[id^='k-tabstrip-tab']:has-text('Folio')").first
|
|
folio_tab.click()
|
|
time.sleep(2)
|
|
|
|
# Fill folio input
|
|
print(f" Filling folio {folio_clean}...")
|
|
folio_input = page.locator("kendo-textbox[formcontrolname='folio'] input").first
|
|
if folio_input.count() == 0:
|
|
# Try alternate selector
|
|
folio_input = page.locator("input.k-input-inner").nth(0)
|
|
folio_input.fill(folio_clean)
|
|
time.sleep(1)
|
|
|
|
# Click search button
|
|
search_btn = page.locator("button[aria-label='Search button']").first
|
|
search_btn.click()
|
|
time.sleep(8)
|
|
print(f" URL after search: {page.url}")
|
|
|
|
(out_dir / "02_detail.html").write_text(page.content(), encoding="utf-8")
|
|
page.screenshot(path=str(out_dir / "02_detail.png"), full_page=True)
|
|
|
|
# Dump all element IDs with text
|
|
print("\n[Dumping populated elements...]")
|
|
elements = page.evaluate("""
|
|
() => {
|
|
const out = [];
|
|
const all = document.querySelectorAll('[id], [class*="owner"], [class*="folio"], [class*="year"], [class*="value"]');
|
|
for (const el of all) {
|
|
const txt = (el.textContent || '').trim();
|
|
if (txt && txt.length < 200 && el.children.length < 4) {
|
|
out.push({
|
|
id: el.id || '(no id)',
|
|
cls: (el.className || '').substring(0, 60),
|
|
text: txt.substring(0, 150),
|
|
});
|
|
}
|
|
}
|
|
// Dedupe by (id, text)
|
|
const seen = new Set();
|
|
return out.filter(e => {
|
|
const k = e.id + '|' + e.text;
|
|
if (seen.has(k)) return false;
|
|
seen.add(k);
|
|
return true;
|
|
});
|
|
}
|
|
""")
|
|
|
|
# Print only elements with values that look meaningful
|
|
keywords = ("owner", "folio", "year", "built", "address", "value", "tax",
|
|
"sale", "deed", "bed", "bath", "sqft", "lot", "use", "subdivision",
|
|
"zoning", "homestead", "assessed", "market")
|
|
for e in elements[:300]:
|
|
txt_lower = e["text"].lower()
|
|
cls_lower = e["cls"].lower()
|
|
id_lower = e["id"].lower()
|
|
if any(k in txt_lower or k in cls_lower or k in id_lower for k in keywords):
|
|
safe = e["text"][:120].encode("ascii", "replace").decode("ascii")
|
|
print(f" {e['id'][:40]:40s} cls={e['cls'][:40]:40s} = {safe!r}")
|
|
|
|
browser.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
probe()
|