Files
AR-House/scripts/probe_bcpa_values.py
T
2026-07-03 12:24:58 -04:00

64 lines
2.3 KiB
Python

"""Wait longer + extract values from BCPA via specific element IDs."""
from playwright.sync_api import sync_playwright
def probe():
folio = "484226062150"
url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
).new_page()
print(f"Loading {url}")
page.goto(url, wait_until="domcontentloaded", timeout=30000)
# Wait for Angular SPA — explicitly wait for actualAgeId div to have text
print("Waiting for actualAgeId div to populate...")
try:
page.wait_for_function(
"() => { const el = document.getElementById('actualAgeId'); return el && el.textContent.trim().length > 0; }",
timeout=25000,
)
print(" populated!")
except Exception as e:
print(f" TIMEOUT: {e}")
# Extract values from known IDs
ids = [
"actualAgeId", "effectiveAgeId", "currentTaxYearMobileId",
"lastTaxYearMobileId", "lastTwoTaxYearMobileId",
]
print("\nValues by element ID:")
for elid in ids:
try:
txt = page.locator(f"#{elid}").inner_text(timeout=2000)
print(f" #{elid}: {txt!r}")
except Exception as e:
print(f" #{elid}: ERROR {e}")
# Try get FULL inner text of property page
print("\n\nFull body text (filtered for non-empty data):")
body = page.inner_text("body")
# Find lines with $ or numeric data
for line in body.split("\n"):
line = line.strip()
if not line:
continue
if any(s in line for s in ("$", "ASSESS", "OWNER", "PROPERTY", "TAX", "SALE", "DEED", "YEAR")):
print(f" {line[:120]}")
# Save updated HTML
from pathlib import Path
out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa"
(out_dir / "03_after_wait.html").write_text(page.content(), encoding="utf-8")
page.screenshot(path=str(out_dir / "03_after_wait.png"), full_page=True)
browser.close()
if __name__ == "__main__":
probe()