"""Probe Civitek search results structure with a real query.""" from __future__ import annotations from pathlib import Path def probe(): from playwright.sync_api import sync_playwright out_dir = Path(__file__).parent.parent / "_probe_out" / "civitek" out_dir.mkdir(parents=True, exist_ok=True) with sync_playwright() as p: browser = p.chromium.launch(headless=True) ctx = browser.new_context(user_agent="Mozilla/5.0 Chrome/120") page = ctx.new_page() # Walk through to search form page.goto("https://www.civitekflorida.com/ocrs/county/27/") page.wait_for_timeout(1500) page.locator("button:has-text('Public')").first.click() page.wait_for_timeout(2500) page.locator("button:has-text('I Agree')").first.click() page.wait_for_timeout(2500) print(f"[1] Search page URL: {page.url}") # Fill search: business name (most foreclosure plaintiffs are entities) page.fill("#form\\:search_tab\\:businessname", "BANK OF AMERICA") # Skip case-type filter for first test (no checkbox click) # Submit — button id is auto-generated, use text print("[3] Clicking Search button (by text)...") # PrimeFaces buttons render as button[type=submit] with ui-button-text search_btn = page.locator("button:has(.ui-button-text:text-is('Search'))").first if search_btn.count() == 0: search_btn = page.locator("button:has-text('Search')").first print(f" Search button visible: {search_btn.is_visible()}") search_btn.click() page.wait_for_timeout(6000) print(f"[4] After submit URL: {page.url}") (out_dir / "04_results.html").write_text(page.content(), encoding="utf-8") page.screenshot(path=str(out_dir / "04_results.png"), full_page=True) # Look for results table print("\n[5] Tables on results page:") tables = page.locator("table").all() for i, tbl in enumerate(tables[:8]): try: rows = tbl.locator("tr").count() cols = tbl.locator("tr").first.locator("td, th").count() if rows > 0 else 0 role = tbl.get_attribute("role") or "" tbl_id = tbl.get_attribute("id") or "" # Skip empty layout tables if rows < 1: continue print(f" [{i}] id={tbl_id!r} role={role!r} rows={rows} cols={cols}") # First header row if rows > 0: headers = tbl.locator("tr").first.locator("th, td").all() hdr_texts = [(h.inner_text() or "").strip()[:25] for h in headers[:10]] print(f" headers: {hdr_texts}") # First data row (skip header) if rows > 1: row1 = tbl.locator("tr").nth(1).locator("td").all() row1_texts = [(c.inner_text() or "").strip()[:30] for c in row1[:10]] print(f" row1: {row1_texts}") except Exception as e: print(f" [{i}] error: {e}") # Look for messages (no results, errors) print("\n[6] Messages on page:") msgs = page.locator(".ui-messages-error, .ui-messages-warn, .ui-messages-info, .ui-message").all() for m in msgs[:10]: try: txt = (m.inner_text() or "").strip()[:200] if txt: print(f" msg: {txt}") except Exception: pass # Look for links to case details print("\n[7] Case detail links (first 5):") case_links = page.locator("a[href*='case'], a[href*='detail']").all()[:5] for a in case_links: try: txt = (a.inner_text() or "").strip()[:50] href = a.get_attribute("href") or "" print(f" {txt!r} → {href[:100]}") except Exception: pass # Pagination print("\n[8] Pagination indicators:") for txt in ["of ", "Page ", "Next", "records"]: loc = page.locator(f"text=/{txt}/") if loc.count() > 0: try: t = loc.first.inner_text()[:80] print(f" '{txt}' → {t!r}") except Exception: pass print(f"\n[OK] saved to {out_dir}/") browser.close() if __name__ == "__main__": probe()