106 lines
4.1 KiB
Python
106 lines
4.1 KiB
Python
"""Diagnose Civitek results page structure — save HTML + look for tables."""
|
|
from pathlib import Path
|
|
|
|
|
|
def diag():
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
out_dir = Path(__file__).parent.parent / "_probe_out" / "civitek"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_context().new_page()
|
|
|
|
# Capture POST requests to see exactly what gets submitted
|
|
captured_posts: list[dict] = []
|
|
def on_request(req):
|
|
if req.method == "POST":
|
|
try:
|
|
body = req.post_data or ""
|
|
except Exception:
|
|
body = ""
|
|
captured_posts.append({"url": req.url, "body": body[:3000]})
|
|
page.on("request", on_request)
|
|
|
|
# Walk through
|
|
page.goto("https://www.civitekflorida.com/ocrs/county/27/")
|
|
page.wait_for_timeout(1500)
|
|
page.locator("button:has-text('Public')").first.click()
|
|
page.wait_for_timeout(2500)
|
|
page.locator("button:has-text('I Agree')").first.click()
|
|
page.wait_for_timeout(2500)
|
|
|
|
# Set value via JS with proper events that JSF listens to
|
|
page.evaluate("""
|
|
const inp = document.getElementById('form:search_tab:businessname');
|
|
inp.focus();
|
|
inp.value = 'BANK OF AMERICA';
|
|
inp.dispatchEvent(new Event('input', { bubbles: true }));
|
|
inp.dispatchEvent(new Event('change', { bubbles: true }));
|
|
inp.dispatchEvent(new Event('blur', { bubbles: true }));
|
|
""")
|
|
page.wait_for_timeout(500)
|
|
val_after = page.locator("#form\\:search_tab\\:businessname").input_value()
|
|
print(f"businessname value (via JS): {val_after!r}")
|
|
|
|
# Find Search button — there's only one (j_idt1095) but ID is unstable.
|
|
# Better: query by JS by text + type=submit
|
|
btn_id = page.evaluate("""
|
|
() => {
|
|
const btns = Array.from(document.querySelectorAll('button[type=submit]'));
|
|
const m = btns.find(b => b.innerText.trim() === 'Search');
|
|
return m ? m.id : null;
|
|
}
|
|
""")
|
|
print(f"Search button id detected: {btn_id!r}")
|
|
if btn_id:
|
|
# Click via JS to avoid CSS selector escaping issues with ":"
|
|
page.evaluate(f"document.getElementById('{btn_id}').click()")
|
|
else:
|
|
search = page.locator("button:has(.ui-button-text:text-is('Search'))").first
|
|
search.click()
|
|
|
|
# Wait extra long
|
|
page.wait_for_timeout(12000)
|
|
print(f"URL: {page.url}")
|
|
|
|
body = page.inner_text("body")
|
|
print(f"\nBody length: {len(body)}")
|
|
print(f"\nFirst 3000 chars of body:\n{body[:3000]}")
|
|
|
|
# Check for "records" text indicating count
|
|
for kw in ["records", "found", "match", "search", "no result", "displaying"]:
|
|
import re
|
|
for m in re.finditer(rf".{{0,80}}{kw}.{{0,80}}", body, re.IGNORECASE):
|
|
t = m.group(0).strip()
|
|
if t and len(t) < 200:
|
|
print(f"\n Match '{kw}': {t}")
|
|
break
|
|
|
|
# Look for all tables
|
|
print(f"\n\nTables on page: {page.locator('table').count()}")
|
|
print(f"DataTables (.ui-datatable): {page.locator('.ui-datatable').count()}")
|
|
print(f"Data grids (role=grid): {page.locator('[role=grid]').count()}")
|
|
|
|
# Save full HTML
|
|
full_html = page.content()
|
|
(out_dir / "07_business_search_results.html").write_text(full_html, encoding="utf-8")
|
|
page.screenshot(path=str(out_dir / "07_results.png"), full_page=True)
|
|
|
|
# Print snippet around any "datatable" reference
|
|
idx = full_html.lower().find("ui-datatable")
|
|
if idx > 0:
|
|
print(f"\nHTML around 'ui-datatable':\n{full_html[idx:idx+800]}")
|
|
|
|
# Print captured POSTs
|
|
print(f"\n\n===== Captured POST requests ({len(captured_posts)}) =====")
|
|
for i, p in enumerate(captured_posts):
|
|
print(f"\n[{i}] URL: {p['url']}")
|
|
print(f" BODY ({len(p['body'])} chars): {p['body'][:1000]}")
|
|
|
|
browser.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
diag()
|