feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
"""Try various wait strategies for bcpa.net SPA."""
|
||||
from playwright.sync_api import sync_playwright
|
||||
import time
|
||||
|
||||
|
||||
def probe():
|
||||
folio = "484226062150"
|
||||
url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}"
|
||||
|
||||
with sync_playwright() as p:
|
||||
# Try with headless=False to see if it's headless detection
|
||||
browser = p.chromium.launch(headless=True)
|
||||
ctx = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131",
|
||||
viewport={"width": 1400, "height": 900},
|
||||
)
|
||||
page = ctx.new_page()
|
||||
|
||||
print(f"[1] Loading {url} (domcontentloaded)")
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
||||
print(" loaded; sleeping 15s for SPA to render...")
|
||||
time.sleep(15)
|
||||
|
||||
# Check what's actually on screen now
|
||||
body = page.inner_text("body")
|
||||
print(f"\n[2] Body length: {len(body)}")
|
||||
print(f"\n[3] Body sample (lines with numbers or $):")
|
||||
for line in body.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or len(line) < 3:
|
||||
continue
|
||||
if any(c.isdigit() for c in line) and len(line) < 200:
|
||||
print(f" {line[:150]}")
|
||||
|
||||
# Look for photos
|
||||
photos = page.evaluate(
|
||||
"Array.from(document.querySelectorAll('img'))"
|
||||
".filter(i => i.src.includes('/Photographs/') && i.naturalWidth > 200)"
|
||||
".map(i => i.src)"
|
||||
)
|
||||
print(f"\n[4] Photos found via JS query: {len(photos)}")
|
||||
for p_url in photos[:3]:
|
||||
print(f" {p_url}")
|
||||
|
||||
# Check actualAgeId now
|
||||
for elid in ["actualAgeId", "effectiveAgeId", "currentTaxYearMobileId", "lastTaxYearMobileId"]:
|
||||
try:
|
||||
txt = page.locator(f"#{elid}").inner_text(timeout=1500)
|
||||
print(f" #{elid}: {txt!r}")
|
||||
except Exception:
|
||||
print(f" #{elid}: empty/error")
|
||||
|
||||
# Try waiting longer
|
||||
print("\n[5] Sleeping another 10s and re-checking...")
|
||||
time.sleep(10)
|
||||
for elid in ["actualAgeId", "currentTaxYearMobileId"]:
|
||||
try:
|
||||
txt = page.locator(f"#{elid}").inner_text(timeout=1500)
|
||||
print(f" #{elid}: {txt!r}")
|
||||
except Exception:
|
||||
print(f" #{elid}: empty/error")
|
||||
|
||||
body2 = page.inner_text("body")
|
||||
print(f"\n[6] Body length after total 25s: {len(body2)}")
|
||||
# If body got bigger, data appeared
|
||||
if len(body2) > 5000:
|
||||
print(f"\n Visible data: {body2[2000:4500]}")
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
probe()
|
||||
Reference in New Issue
Block a user