AR-House/scripts/probe_bcpa_values.py

"""Wait longer + extract values from BCPA via specific element IDs."""
from playwright.sync_api import sync_playwright


def probe():
    folio = "484226062150"
    url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
        ).new_page()

        print(f"Loading {url}")
        page.goto(url, wait_until="domcontentloaded", timeout=30000)

        # Wait for Angular SPA — explicitly wait for actualAgeId div to have text
        print("Waiting for actualAgeId div to populate...")
        try:
            page.wait_for_function(
                "() => { const el = document.getElementById('actualAgeId'); return el && el.textContent.trim().length > 0; }",
                timeout=25000,
            )
            print("  populated!")
        except Exception as e:
            print(f"  TIMEOUT: {e}")

        # Extract values from known IDs
        ids = [
            "actualAgeId", "effectiveAgeId", "currentTaxYearMobileId",
            "lastTaxYearMobileId", "lastTwoTaxYearMobileId",
        ]
        print("\nValues by element ID:")
        for elid in ids:
            try:
                txt = page.locator(f"#{elid}").inner_text(timeout=2000)
                print(f"  #{elid}: {txt!r}")
            except Exception as e:
                print(f"  #{elid}: ERROR {e}")

        # Try get FULL inner text of property page
        print("\n\nFull body text (filtered for non-empty data):")
        body = page.inner_text("body")
        # Find lines with $ or numeric data
        for line in body.split("\n"):
            line = line.strip()
            if not line:
                continue
            if any(s in line for s in ("$", "ASSESS", "OWNER", "PROPERTY", "TAX", "SALE", "DEED", "YEAR")):
                print(f"  {line[:120]}")

        # Save updated HTML
        from pathlib import Path
        out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa"
        (out_dir / "03_after_wait.html").write_text(page.content(), encoding="utf-8")
        page.screenshot(path=str(out_dir / "03_after_wait.png"), full_page=True)

        browser.close()


if __name__ == "__main__":
    probe()