"""Wait 25s + dump all element IDs that have text content.""" from playwright.sync_api import sync_playwright from pathlib import Path import time def probe(): folio = "484226062150" url = f"https://web.bcpa.net/bcpaclient/#/Record-Search?folio={folio}" out_dir = Path(__file__).parent.parent / "_probe_out" / "bcpa" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/131" ).new_page() page.goto(url, wait_until="domcontentloaded", timeout=20000) time.sleep(25) # Save full rendered HTML (out_dir / "FINAL_rendered.html").write_text(page.content(), encoding="utf-8") page.screenshot(path=str(out_dir / "FINAL_rendered.png"), full_page=True) # Extract ALL elements with id attribute that have text content elements = page.evaluate(""" () => { const out = []; const all = document.querySelectorAll('[id]'); for (const el of all) { const txt = (el.textContent || '').trim(); // Only collect leaf-like elements with reasonable text if (txt && txt.length < 300 && el.children.length < 3) { // Find the closest visible label (preceding sibling td.lblRecinfoNew or label) let label = ''; const parent = el.closest('tr, div.row, p, .info-row'); if (parent) { const labelEl = parent.querySelector('.lblRecinfoNew, .searchTblCategory, .searchTblCategory2, label, .info-label'); if (labelEl) label = (labelEl.textContent || '').trim().substring(0, 80); } out.push({id: el.id, text: txt.substring(0, 200), label: label}); } } return out; } """) print(f"Elements with text content: {len(elements)}\n") for e in elements: label = e['label'][:50] if e['label'] else '' print(f" #{e['id']:35s} [{label[:30]:30s}] = {e['text'][:80]!r}") # Also extract table data — populated cells print("\n\n===== TABLE DATA (rows with non-empty cells) =====") tables_data = page.evaluate(""" () => { const out = []; const tables = document.querySelectorAll('table'); tables.forEach((tbl, idx) => { const rows = []; for (const tr of tbl.querySelectorAll('tr')) { const cells = []; for (const c of tr.querySelectorAll('td, th')) { cells.push((c.textContent || '').trim()); } if (cells.some(c => c && c.length > 0)) { rows.push(cells); } } if (rows.length > 0) { // First row sometimes has the table identifier out.push({idx, rows: rows.slice(0, 15)}); } }); return out; } """) for t in tables_data: if not t["rows"]: continue first_row = " | ".join(t["rows"][0][:6])[:120] print(f"\n--- Table {t['idx']} ({len(t['rows'])} rows) ---") print(f" Header/R0: {first_row}") for r in t["rows"][1:6]: line = " | ".join(c[:35] for c in r[:6])[:140] print(f" R: {line}") browser.close() if __name__ == "__main__": probe()