74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
"""Try various wait strategies for bcpa.net SPA."""
|
|
from playwright.sync_api import sync_playwright
|
|
import time
|
|
|
|
|
|
def probe():
|
|
folio = "484226062150"
|
|
url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"
|
|
|
|
with sync_playwright() as p:
|
|
# Try with headless=False to see if it's headless detection
|
|
browser = p.chromium.launch(headless=True)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131",
|
|
viewport={"width": 1400, "height": 900},
|
|
)
|
|
page = ctx.new_page()
|
|
|
|
print(f"[1] Loading {url} (domcontentloaded)")
|
|
page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
|
print(" loaded; sleeping 15s for SPA to render...")
|
|
time.sleep(15)
|
|
|
|
# Check what's actually on screen now
|
|
body = page.inner_text("body")
|
|
print(f"\n[2] Body length: {len(body)}")
|
|
print(f"\n[3] Body sample (lines with numbers or $):")
|
|
for line in body.split("\n"):
|
|
line = line.strip()
|
|
if not line or len(line) < 3:
|
|
continue
|
|
if any(c.isdigit() for c in line) and len(line) < 200:
|
|
print(f" {line[:150]}")
|
|
|
|
# Look for photos
|
|
photos = page.evaluate(
|
|
"Array.from(document.querySelectorAll('img'))"
|
|
".filter(i => i.src.includes('/Photographs/') && i.naturalWidth > 200)"
|
|
".map(i => i.src)"
|
|
)
|
|
print(f"\n[4] Photos found via JS query: {len(photos)}")
|
|
for p_url in photos[:3]:
|
|
print(f" {p_url}")
|
|
|
|
# Check actualAgeId now
|
|
for elid in ["actualAgeId", "effectiveAgeId", "currentTaxYearMobileId", "lastTaxYearMobileId"]:
|
|
try:
|
|
txt = page.locator(f"#{elid}").inner_text(timeout=1500)
|
|
print(f" #{elid}: {txt!r}")
|
|
except Exception:
|
|
print(f" #{elid}: empty/error")
|
|
|
|
# Try waiting longer
|
|
print("\n[5] Sleeping another 10s and re-checking...")
|
|
time.sleep(10)
|
|
for elid in ["actualAgeId", "currentTaxYearMobileId"]:
|
|
try:
|
|
txt = page.locator(f"#{elid}").inner_text(timeout=1500)
|
|
print(f" #{elid}: {txt!r}")
|
|
except Exception:
|
|
print(f" #{elid}: empty/error")
|
|
|
|
body2 = page.inner_text("body")
|
|
print(f"\n[6] Body length after total 25s: {len(body2)}")
|
|
# If body got bigger, data appeared
|
|
if len(body2) > 5000:
|
|
print(f"\n Visible data: {body2[2000:4500]}")
|
|
|
|
browser.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
probe()
|