"""Diagnose Civitek results page structure — save HTML + look for tables.""" from pathlib import Path def diag(): from playwright.sync_api import sync_playwright out_dir = Path(__file__).parent.parent / "_probe_out" / "civitek" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_context().new_page() # Capture POST requests to see exactly what gets submitted captured_posts: list[dict] = [] def on_request(req): if req.method == "POST": try: body = req.post_data or "" except Exception: body = "" captured_posts.append({"url": req.url, "body": body[:3000]}) page.on("request", on_request) # Walk through page.goto("https://www.civitekflorida.com/ocrs/county/27/") page.wait_for_timeout(1500) page.locator("button:has-text('Public')").first.click() page.wait_for_timeout(2500) page.locator("button:has-text('I Agree')").first.click() page.wait_for_timeout(2500) # Set value via JS with proper events that JSF listens to page.evaluate(""" const inp = document.getElementById('form:search_tab:businessname'); inp.focus(); inp.value = 'BANK OF AMERICA'; inp.dispatchEvent(new Event('input', { bubbles: true })); inp.dispatchEvent(new Event('change', { bubbles: true })); inp.dispatchEvent(new Event('blur', { bubbles: true })); """) page.wait_for_timeout(500) val_after = page.locator("#form\\:search_tab\\:businessname").input_value() print(f"businessname value (via JS): {val_after!r}") # Find Search button — there's only one (j_idt1095) but ID is unstable. # Better: query by JS by text + type=submit btn_id = page.evaluate(""" () => { const btns = Array.from(document.querySelectorAll('button[type=submit]')); const m = btns.find(b => b.innerText.trim() === 'Search'); return m ? m.id : null; } """) print(f"Search button id detected: {btn_id!r}") if btn_id: # Click via JS to avoid CSS selector escaping issues with ":" page.evaluate(f"document.getElementById('{btn_id}').click()") else: search = page.locator("button:has(.ui-button-text:text-is('Search'))").first search.click() # Wait extra long page.wait_for_timeout(12000) print(f"URL: {page.url}") body = page.inner_text("body") print(f"\nBody length: {len(body)}") print(f"\nFirst 3000 chars of body:\n{body[:3000]}") # Check for "records" text indicating count for kw in ["records", "found", "match", "search", "no result", "displaying"]: import re for m in re.finditer(rf".{{0,80}}{kw}.{{0,80}}", body, re.IGNORECASE): t = m.group(0).strip() if t and len(t) < 200: print(f"\n Match '{kw}': {t}") break # Look for all tables print(f"\n\nTables on page: {page.locator('table').count()}") print(f"DataTables (.ui-datatable): {page.locator('.ui-datatable').count()}") print(f"Data grids (role=grid): {page.locator('[role=grid]').count()}") # Save full HTML full_html = page.content() (out_dir / "07_business_search_results.html").write_text(full_html, encoding="utf-8") page.screenshot(path=str(out_dir / "07_results.png"), full_page=True) # Print snippet around any "datatable" reference idx = full_html.lower().find("ui-datatable") if idx > 0: print(f"\nHTML around 'ui-datatable':\n{full_html[idx:idx+800]}") # Print captured POSTs print(f"\n\n===== Captured POST requests ({len(captured_posts)}) =====") for i, p in enumerate(captured_posts): print(f"\n[{i}] URL: {p['url']}") print(f" BODY ({len(p['body'])} chars): {p['body'][:1000]}") browser.close() if __name__ == "__main__": diag()