"""Wait longer + extract values from BCPA via specific element IDs.""" from playwright.sync_api import sync_playwright def probe(): folio = "484226062150" url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131" ).new_page() print(f"Loading {url}") page.goto(url, wait_until="domcontentloaded", timeout=30000) # Wait for Angular SPA — explicitly wait for actualAgeId div to have text print("Waiting for actualAgeId div to populate...") try: page.wait_for_function( "() => { const el = document.getElementById('actualAgeId'); return el && el.textContent.trim().length > 0; }", timeout=25000, ) print(" populated!") except Exception as e: print(f" TIMEOUT: {e}") # Extract values from known IDs ids = [ "actualAgeId", "effectiveAgeId", "currentTaxYearMobileId", "lastTaxYearMobileId", "lastTwoTaxYearMobileId", ] print("\nValues by element ID:") for elid in ids: try: txt = page.locator(f"#{elid}").inner_text(timeout=2000) print(f" #{elid}: {txt!r}") except Exception as e: print(f" #{elid}: ERROR {e}") # Try get FULL inner text of property page print("\n\nFull body text (filtered for non-empty data):") body = page.inner_text("body") # Find lines with $ or numeric data for line in body.split("\n"): line = line.strip() if not line: continue if any(s in line for s in ("$", "ASSESS", "OWNER", "PROPERTY", "TAX", "SALE", "DEED", "YEAR")): print(f" {line[:120]}") # Save updated HTML from pathlib import Path out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa" (out_dir / "03_after_wait.html").write_text(page.content(), encoding="utf-8") page.screenshot(path=str(out_dir / "03_after_wait.png"), full_page=True) browser.close() if __name__ == "__main__": probe()