feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+107
View File
@@ -0,0 +1,107 @@
"""Probe Civitek search results structure with a real query."""
from __future__ import annotations
from pathlib import Path
def probe():
from playwright.sync_api import sync_playwright
out_dir = Path(__file__).parent.parent / "_probe_out" / "civitek"
out_dir.mkdir(parents=True, exist_ok=True)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
ctx = browser.new_context(user_agent="Mozilla/5.0 Chrome/120")
page = ctx.new_page()
# Walk through to search form
page.goto("https://www.civitekflorida.com/ocrs/county/27/")
page.wait_for_timeout(1500)
page.locator("button:has-text('Public')").first.click()
page.wait_for_timeout(2500)
page.locator("button:has-text('I Agree')").first.click()
page.wait_for_timeout(2500)
print(f"[1] Search page URL: {page.url}")
# Fill search: business name (most foreclosure plaintiffs are entities)
page.fill("#form\\:search_tab\\:businessname", "BANK OF AMERICA")
# Skip case-type filter for first test (no checkbox click)
# Submit — button id is auto-generated, use text
print("[3] Clicking Search button (by text)...")
# PrimeFaces buttons render as button[type=submit] with ui-button-text
search_btn = page.locator("button:has(.ui-button-text:text-is('Search'))").first
if search_btn.count() == 0:
search_btn = page.locator("button:has-text('Search')").first
print(f" Search button visible: {search_btn.is_visible()}")
search_btn.click()
page.wait_for_timeout(6000)
print(f"[4] After submit URL: {page.url}")
(out_dir / "04_results.html").write_text(page.content(), encoding="utf-8")
page.screenshot(path=str(out_dir / "04_results.png"), full_page=True)
# Look for results table
print("\n[5] Tables on results page:")
tables = page.locator("table").all()
for i, tbl in enumerate(tables[:8]):
try:
rows = tbl.locator("tr").count()
cols = tbl.locator("tr").first.locator("td, th").count() if rows > 0 else 0
role = tbl.get_attribute("role") or ""
tbl_id = tbl.get_attribute("id") or ""
# Skip empty layout tables
if rows < 1:
continue
print(f" [{i}] id={tbl_id!r} role={role!r} rows={rows} cols={cols}")
# First header row
if rows > 0:
headers = tbl.locator("tr").first.locator("th, td").all()
hdr_texts = [(h.inner_text() or "").strip()[:25] for h in headers[:10]]
print(f" headers: {hdr_texts}")
# First data row (skip header)
if rows > 1:
row1 = tbl.locator("tr").nth(1).locator("td").all()
row1_texts = [(c.inner_text() or "").strip()[:30] for c in row1[:10]]
print(f" row1: {row1_texts}")
except Exception as e:
print(f" [{i}] error: {e}")
# Look for messages (no results, errors)
print("\n[6] Messages on page:")
msgs = page.locator(".ui-messages-error, .ui-messages-warn, .ui-messages-info, .ui-message").all()
for m in msgs[:10]:
try:
txt = (m.inner_text() or "").strip()[:200]
if txt:
print(f" msg: {txt}")
except Exception:
pass
# Look for links to case details
print("\n[7] Case detail links (first 5):")
case_links = page.locator("a[href*='case'], a[href*='detail']").all()[:5]
for a in case_links:
try:
txt = (a.inner_text() or "").strip()[:50]
href = a.get_attribute("href") or ""
print(f" {txt!r}{href[:100]}")
except Exception:
pass
# Pagination
print("\n[8] Pagination indicators:")
for txt in ["of ", "Page ", "Next", "records"]:
loc = page.locator(f"text=/{txt}/")
if loc.count() > 0:
try:
t = loc.first.inner_text()[:80]
print(f" '{txt}'{t!r}")
except Exception:
pass
print(f"\n[OK] saved to {out_dir}/")
browser.close()
if __name__ == "__main__":
probe()