Files
AR-House/scripts/diag_civitek_results.py
2026-07-03 12:24:58 -04:00

106 lines
4.1 KiB
Python

"""Diagnose Civitek results page structure — save HTML + look for tables."""
from pathlib import Path
def diag():
from playwright.sync_api import sync_playwright
out_dir = Path(__file__).parent.parent / "_probe_out" / "civitek"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_context().new_page()
# Capture POST requests to see exactly what gets submitted
captured_posts: list[dict] = []
def on_request(req):
if req.method == "POST":
try:
body = req.post_data or ""
except Exception:
body = ""
captured_posts.append({"url": req.url, "body": body[:3000]})
page.on("request", on_request)
# Walk through
page.goto("https://www.civitekflorida.com/ocrs/county/27/")
page.wait_for_timeout(1500)
page.locator("button:has-text('Public')").first.click()
page.wait_for_timeout(2500)
page.locator("button:has-text('I Agree')").first.click()
page.wait_for_timeout(2500)
# Set value via JS with proper events that JSF listens to
page.evaluate("""
const inp = document.getElementById('form:search_tab:businessname');
inp.focus();
inp.value = 'BANK OF AMERICA';
inp.dispatchEvent(new Event('input', { bubbles: true }));
inp.dispatchEvent(new Event('change', { bubbles: true }));
inp.dispatchEvent(new Event('blur', { bubbles: true }));
""")
page.wait_for_timeout(500)
val_after = page.locator("#form\\:search_tab\\:businessname").input_value()
print(f"businessname value (via JS): {val_after!r}")
# Find Search button — there's only one (j_idt1095) but ID is unstable.
# Better: query by JS by text + type=submit
btn_id = page.evaluate("""
() => {
const btns = Array.from(document.querySelectorAll('button[type=submit]'));
const m = btns.find(b => b.innerText.trim() === 'Search');
return m ? m.id : null;
}
""")
print(f"Search button id detected: {btn_id!r}")
if btn_id:
# Click via JS to avoid CSS selector escaping issues with ":"
page.evaluate(f"document.getElementById('{btn_id}').click()")
else:
search = page.locator("button:has(.ui-button-text:text-is('Search'))").first
search.click()
# Wait extra long
page.wait_for_timeout(12000)
print(f"URL: {page.url}")
body = page.inner_text("body")
print(f"\nBody length: {len(body)}")
print(f"\nFirst 3000 chars of body:\n{body[:3000]}")
# Check for "records" text indicating count
for kw in ["records", "found", "match", "search", "no result", "displaying"]:
import re
for m in re.finditer(rf".{{0,80}}{kw}.{{0,80}}", body, re.IGNORECASE):
t = m.group(0).strip()
if t and len(t) < 200:
print(f"\n Match '{kw}': {t}")
break
# Look for all tables
print(f"\n\nTables on page: {page.locator('table').count()}")
print(f"DataTables (.ui-datatable): {page.locator('.ui-datatable').count()}")
print(f"Data grids (role=grid): {page.locator('[role=grid]').count()}")
# Save full HTML
full_html = page.content()
(out_dir / "07_business_search_results.html").write_text(full_html, encoding="utf-8")
page.screenshot(path=str(out_dir / "07_results.png"), full_page=True)
# Print snippet around any "datatable" reference
idx = full_html.lower().find("ui-datatable")
if idx > 0:
print(f"\nHTML around 'ui-datatable':\n{full_html[idx:idx+800]}")
# Print captured POSTs
print(f"\n\n===== Captured POST requests ({len(captured_posts)}) =====")
for i, p in enumerate(captured_posts):
print(f"\n[{i}] URL: {p['url']}")
print(f" BODY ({len(p['body'])} chars): {p['body'][:1000]}")
browser.close()
if __name__ == "__main__":
diag()