feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
"""Wait longer + extract values from BCPA via specific element IDs."""
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
def probe():
|
||||
folio = "484226062150"
|
||||
url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
|
||||
).new_page()
|
||||
|
||||
print(f"Loading {url}")
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
|
||||
# Wait for Angular SPA — explicitly wait for actualAgeId div to have text
|
||||
print("Waiting for actualAgeId div to populate...")
|
||||
try:
|
||||
page.wait_for_function(
|
||||
"() => { const el = document.getElementById('actualAgeId'); return el && el.textContent.trim().length > 0; }",
|
||||
timeout=25000,
|
||||
)
|
||||
print(" populated!")
|
||||
except Exception as e:
|
||||
print(f" TIMEOUT: {e}")
|
||||
|
||||
# Extract values from known IDs
|
||||
ids = [
|
||||
"actualAgeId", "effectiveAgeId", "currentTaxYearMobileId",
|
||||
"lastTaxYearMobileId", "lastTwoTaxYearMobileId",
|
||||
]
|
||||
print("\nValues by element ID:")
|
||||
for elid in ids:
|
||||
try:
|
||||
txt = page.locator(f"#{elid}").inner_text(timeout=2000)
|
||||
print(f" #{elid}: {txt!r}")
|
||||
except Exception as e:
|
||||
print(f" #{elid}: ERROR {e}")
|
||||
|
||||
# Try get FULL inner text of property page
|
||||
print("\n\nFull body text (filtered for non-empty data):")
|
||||
body = page.inner_text("body")
|
||||
# Find lines with $ or numeric data
|
||||
for line in body.split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if any(s in line for s in ("$", "ASSESS", "OWNER", "PROPERTY", "TAX", "SALE", "DEED", "YEAR")):
|
||||
print(f" {line[:120]}")
|
||||
|
||||
# Save updated HTML
|
||||
from pathlib import Path
|
||||
out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa"
|
||||
(out_dir / "03_after_wait.html").write_text(page.content(), encoding="utf-8")
|
||||
page.screenshot(path=str(out_dir / "03_after_wait.png"), full_page=True)
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
probe()
|
||||
Reference in New Issue
Block a user