64 lines
2.3 KiB
Python
64 lines
2.3 KiB
Python
"""Wait longer + extract values from BCPA via specific element IDs."""
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
|
|
def probe():
|
|
folio = "484226062150"
|
|
url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
|
|
).new_page()
|
|
|
|
print(f"Loading {url}")
|
|
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
|
|
# Wait for Angular SPA — explicitly wait for actualAgeId div to have text
|
|
print("Waiting for actualAgeId div to populate...")
|
|
try:
|
|
page.wait_for_function(
|
|
"() => { const el = document.getElementById('actualAgeId'); return el && el.textContent.trim().length > 0; }",
|
|
timeout=25000,
|
|
)
|
|
print(" populated!")
|
|
except Exception as e:
|
|
print(f" TIMEOUT: {e}")
|
|
|
|
# Extract values from known IDs
|
|
ids = [
|
|
"actualAgeId", "effectiveAgeId", "currentTaxYearMobileId",
|
|
"lastTaxYearMobileId", "lastTwoTaxYearMobileId",
|
|
]
|
|
print("\nValues by element ID:")
|
|
for elid in ids:
|
|
try:
|
|
txt = page.locator(f"#{elid}").inner_text(timeout=2000)
|
|
print(f" #{elid}: {txt!r}")
|
|
except Exception as e:
|
|
print(f" #{elid}: ERROR {e}")
|
|
|
|
# Try get FULL inner text of property page
|
|
print("\n\nFull body text (filtered for non-empty data):")
|
|
body = page.inner_text("body")
|
|
# Find lines with $ or numeric data
|
|
for line in body.split("\n"):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
if any(s in line for s in ("$", "ASSESS", "OWNER", "PROPERTY", "TAX", "SALE", "DEED", "YEAR")):
|
|
print(f" {line[:120]}")
|
|
|
|
# Save updated HTML
|
|
from pathlib import Path
|
|
out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa"
|
|
(out_dir / "03_after_wait.html").write_text(page.content(), encoding="utf-8")
|
|
page.screenshot(path=str(out_dir / "03_after_wait.png"), full_page=True)
|
|
|
|
browser.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
probe()
|