"""Probe Miami-Dade PA detail page — fetch real folio and map fields.""" from pathlib import Path import time def probe(): from playwright.sync_api import sync_playwright out_dir = Path(__file__).parent.parent / "_probe_out" / "mdpa" folio = "31-2202-034-2470" # 19201 COLLINS AVE — real deal with sync_playwright() as p: browser = p.chromium.launch(headless=True) ctx = browser.new_context( user_agent="Mozilla/5.0 Chrome/131", ) page = ctx.new_page() # Land then go to detail directly if URL is parameterized # Try direct deep link via folio # MIA PA accepts folio in URL: /PropertySearch/#/?folio=XXXXXX (likely) folio_clean = folio.replace("-", "") deep_urls = [ f"https://apps.miamidadepa.gov/PropertySearch/#/?folio={folio_clean}", f"https://apps.miamidadepa.gov/PropertySearch/#/details?folio={folio_clean}", f"https://apps.miamidadepa.gov/PropertySearch/#/property/{folio_clean}", ] for url in deep_urls: print(f"[Try] {url}") page.goto(url, wait_until="domcontentloaded", timeout=30000) time.sleep(8) print(f" URL after load: {page.url}") print(f" Title: {page.title()}") body = page.inner_text("body")[:300] print(f" Body: {body[:200].encode('ascii','replace').decode('ascii')}") # If we see property details, stop if any(kw in body.lower() for kw in ("owner", "folio:", "year built")): print(" HIT - detail page!") break time.sleep(2) # If deep link didn't work, do search via form body = page.inner_text("body") if "owner" not in body.lower() or "year built" not in body.lower(): print("\n[Fallback] Doing form search via Folio tab...") page.goto("https://apps.miamidadepa.gov/PropertySearch/", wait_until="domcontentloaded") time.sleep(6) # Click Folio tab print(" Clicking Folio tab...") folio_tab = page.locator("li[id^='k-tabstrip-tab']:has-text('Folio')").first folio_tab.click() time.sleep(2) # Fill folio input print(f" Filling folio {folio_clean}...") folio_input = page.locator("kendo-textbox[formcontrolname='folio'] input").first if folio_input.count() == 0: # Try alternate selector folio_input = page.locator("input.k-input-inner").nth(0) folio_input.fill(folio_clean) time.sleep(1) # Click search button search_btn = page.locator("button[aria-label='Search button']").first search_btn.click() time.sleep(8) print(f" URL after search: {page.url}") (out_dir / "02_detail.html").write_text(page.content(), encoding="utf-8") page.screenshot(path=str(out_dir / "02_detail.png"), full_page=True) # Dump all element IDs with text print("\n[Dumping populated elements...]") elements = page.evaluate(""" () => { const out = []; const all = document.querySelectorAll('[id], [class*="owner"], [class*="folio"], [class*="year"], [class*="value"]'); for (const el of all) { const txt = (el.textContent || '').trim(); if (txt && txt.length < 200 && el.children.length < 4) { out.push({ id: el.id || '(no id)', cls: (el.className || '').substring(0, 60), text: txt.substring(0, 150), }); } } // Dedupe by (id, text) const seen = new Set(); return out.filter(e => { const k = e.id + '|' + e.text; if (seen.has(k)) return false; seen.add(k); return true; }); } """) # Print only elements with values that look meaningful keywords = ("owner", "folio", "year", "built", "address", "value", "tax", "sale", "deed", "bed", "bath", "sqft", "lot", "use", "subdivision", "zoning", "homestead", "assessed", "market") for e in elements[:300]: txt_lower = e["text"].lower() cls_lower = e["cls"].lower() id_lower = e["id"].lower() if any(k in txt_lower or k in cls_lower or k in id_lower for k in keywords): safe = e["text"][:120].encode("ascii", "replace").decode("ascii") print(f" {e['id'][:40]:40s} cls={e['cls'][:40]:40s} = {safe!r}") browser.close() if __name__ == "__main__": probe()