feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,456 @@
|
||||
"""civitek_ocrs.py — Court records adapter for 33 FL counties on Civitek OCRS platform.
|
||||
|
||||
Civitek OCRS (Online Court Records Search) es la plataforma JSF/PrimeFaces
|
||||
que comparten 33 condados de FL. Una sola implementacion los cubre a todos.
|
||||
|
||||
COUNTIES COVERED (33):
|
||||
Baker, Bradford, Calhoun, Columbia, DeSoto, Dixie, Franklin, Gilchrist,
|
||||
Glades, Gulf, Hamilton, Hardee, Hendry, Hernando, Highlands, Holmes,
|
||||
Jackson, Jefferson, Lafayette, Levy, Liberty, Madison, Marion, Nassau,
|
||||
Okeechobee, Pasco, Putnam, Santa Rosa, Sumter, Union, Wakulla, Walton,
|
||||
Washington.
|
||||
|
||||
NOT INCLUDED (use other adapters):
|
||||
Indian River, Brevard, Volusia, Lake, Citrus, Flagler, Charlotte, Manatee,
|
||||
Sarasota, Polk, Osceola, Seminole, Alachua, Bay, Escambia, Leon, Monroe,
|
||||
Collier, Lee, St. Lucie, Martin, St. Johns, Clay, Duval, Orange, Pinellas,
|
||||
Hillsborough, Miami-Dade, Broward, Palm Beach, Suwannee, Citrus, Taylor.
|
||||
|
||||
USAGE:
|
||||
from data_fetchers.civitek_ocrs import fetch_civitek_court_records
|
||||
result = fetch_civitek_court_records(
|
||||
county_name="Hernando",
|
||||
case_number="2024-CA-001234",
|
||||
)
|
||||
# → {status, case_data, lis_pendens, sources_used, source_url, errors, ...}
|
||||
|
||||
TECHNICAL NOTES:
|
||||
- Uses Playwright headless Chromium (free, ~$0 cost per query)
|
||||
- Civitek is PrimeFaces/JSF stateful — needs full browser, not curl/requests
|
||||
- Auto-generated DOM ids (j_idt*) change per session — we use text selectors
|
||||
- Field ids bound to managed beans (search_tab:lastname, search_tab:year) ARE stable
|
||||
- Per-query latency: ~6-10s (entry → disclaimer → tab switch → search → parse)
|
||||
- Rate limit: not stated by Civitek — we self-throttle to 1 req/2s
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# COUNTY CODE MAPPING (Civitek 2-digit codes)
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
CIVITEK_COUNTY_CODES: dict[str, str] = {
|
||||
# Format: "County Name (canonical)": "NN" (2-digit Civitek code)
|
||||
"Baker": "02",
|
||||
"Bradford": "04",
|
||||
"Calhoun": "07",
|
||||
"Columbia": "12",
|
||||
"DeSoto": "14",
|
||||
"Dixie": "15",
|
||||
"Franklin": "19",
|
||||
"Gilchrist": "21",
|
||||
"Glades": "22",
|
||||
"Gulf": "23",
|
||||
"Hamilton": "24",
|
||||
"Hardee": "25",
|
||||
"Hendry": "26",
|
||||
"Hernando": "27",
|
||||
"Highlands": "28",
|
||||
"Holmes": "30",
|
||||
"Jackson": "32",
|
||||
"Jefferson": "33",
|
||||
"Lafayette": "34",
|
||||
"Levy": "38",
|
||||
"Liberty": "39",
|
||||
"Madison": "40",
|
||||
"Marion": "42",
|
||||
"Nassau": "45",
|
||||
"Okeechobee": "47",
|
||||
"Pasco": "51",
|
||||
"Putnam": "54",
|
||||
"Santa Rosa": "57",
|
||||
"Sumter": "60",
|
||||
"Union": "63",
|
||||
"Wakulla": "65",
|
||||
"Walton": "66",
|
||||
"Washington": "67",
|
||||
}
|
||||
|
||||
|
||||
def is_civitek_county(county_name: Optional[str]) -> bool:
|
||||
"""True if county is in Civitek (33 FL counties)."""
|
||||
if not county_name:
|
||||
return False
|
||||
cn = county_name.strip().replace(" County", "").replace(" county", "")
|
||||
return cn in CIVITEK_COUNTY_CODES
|
||||
|
||||
|
||||
def civitek_code_for(county_name: str) -> Optional[str]:
|
||||
"""Return Civitek 2-digit code for a county name, or None."""
|
||||
cn = county_name.strip().replace(" County", "").replace(" county", "")
|
||||
return CIVITEK_COUNTY_CODES.get(cn)
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# CASE NUMBER PARSER
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
# Real FL case numbers come in many shapes. Civitek wants (year, sequence) separately.
|
||||
# Common formats observed in realauction.com deals:
|
||||
# "2024-CA-001234"
|
||||
# "23-2024-CA-001234"
|
||||
# "2024CA001234"
|
||||
# "2024-001234-CA"
|
||||
# "27-2024-CA-001234" (court code prefix)
|
||||
_CASE_PATTERNS = [
|
||||
# year-type-seq
|
||||
re.compile(r"(?:\d{2}-)?(?P<year>20\d{2})[\-\s]?(?:CA|CC|CF|MM|DR|CP)[\-\s]?(?P<seq>\d{3,8})", re.IGNORECASE),
|
||||
# year-seq-type
|
||||
re.compile(r"(?P<year>20\d{2})[\-\s]?(?P<seq>\d{3,8})[\-\s]?(?:CA|CC|CF|MM|DR|CP)", re.IGNORECASE),
|
||||
# tight: yearTypeNNNNNN
|
||||
re.compile(r"(?P<year>20\d{2})(?:CA|CC|CF|MM|DR|CP)(?P<seq>\d{3,8})", re.IGNORECASE),
|
||||
]
|
||||
|
||||
|
||||
def parse_case_number(case_number: str) -> Optional[tuple[str, str]]:
|
||||
"""Parse a FL case_number into (year, sequence). Returns None if unparseable.
|
||||
|
||||
Examples:
|
||||
"2024-CA-001234" → ("2024", "001234")
|
||||
"23-2024-CA-001234" → ("2024", "001234")
|
||||
"2024CA001234" → ("2024", "001234")
|
||||
"""
|
||||
if not case_number:
|
||||
return None
|
||||
cn = case_number.strip().upper()
|
||||
for pat in _CASE_PATTERNS:
|
||||
m = pat.search(cn)
|
||||
if m:
|
||||
year = m.group("year")
|
||||
seq = m.group("seq").lstrip("0") or "0"
|
||||
return (year, seq)
|
||||
return None
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# PUBLIC API
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def fetch_civitek_court_records(
|
||||
county_name: str,
|
||||
case_number: Optional[str] = None,
|
||||
party_lastname: Optional[str] = None,
|
||||
party_firstname: Optional[str] = None,
|
||||
business_name: Optional[str] = None,
|
||||
headless: bool = True,
|
||||
timeout_seconds: int = 45,
|
||||
) -> dict:
|
||||
"""Fetch court records from Civitek OCRS.
|
||||
|
||||
Provide ONE of:
|
||||
- case_number (e.g., "2024-CA-001234") → fastest, most precise
|
||||
- party_lastname (with optional firstname) → person search
|
||||
- business_name → business search
|
||||
|
||||
Returns dict matching court_records.py contract:
|
||||
{
|
||||
"status": "CLEAN" | "LIS_PENDENS_ACTIVE" | "FORECLOSURE_PENDING" |
|
||||
"FORECLOSURE_COMPLETE" | "OWNER_VERIFIED" | "UNKNOWN" |
|
||||
"NOT_FOUND" | "ERROR",
|
||||
"county": str (normalized),
|
||||
"case_number_searched": str,
|
||||
"search_method": "case_number" | "person_name" | "business_name",
|
||||
"results": list of dicts (raw cases found),
|
||||
"case_data": dict (top result enriched) | None,
|
||||
"lis_pendens": list,
|
||||
"liens_inventory": dict,
|
||||
"sources_used": ["civitek_ocrs"],
|
||||
"source_url": str,
|
||||
"errors": list of strings,
|
||||
"fetched_at": ISO timestamp,
|
||||
}
|
||||
"""
|
||||
fetched_at = datetime.utcnow().isoformat() + "Z"
|
||||
county_normalized = (county_name or "").strip().replace(" County", "").replace(" county", "")
|
||||
|
||||
# Validate county
|
||||
code = civitek_code_for(county_normalized)
|
||||
if not code:
|
||||
return _error_result(
|
||||
county=county_normalized,
|
||||
case_number_searched=case_number or "",
|
||||
error=f"County '{county_normalized}' not on Civitek platform. "
|
||||
f"Supported: {sorted(CIVITEK_COUNTY_CODES.keys())[:10]}...",
|
||||
fetched_at=fetched_at,
|
||||
)
|
||||
|
||||
# Validate at least one search criterion
|
||||
if not (case_number or party_lastname or business_name):
|
||||
return _error_result(
|
||||
county=county_normalized,
|
||||
case_number_searched="",
|
||||
error="Must provide one of: case_number, party_lastname, or business_name",
|
||||
fetched_at=fetched_at,
|
||||
)
|
||||
|
||||
# Determine search method
|
||||
if case_number:
|
||||
parsed = parse_case_number(case_number)
|
||||
if not parsed:
|
||||
return _error_result(
|
||||
county=county_normalized,
|
||||
case_number_searched=case_number,
|
||||
error=f"Could not parse case_number '{case_number}' into year+sequence",
|
||||
fetched_at=fetched_at,
|
||||
)
|
||||
year, seq = parsed
|
||||
search_method = "case_number"
|
||||
elif business_name:
|
||||
year = seq = None
|
||||
search_method = "business_name"
|
||||
else:
|
||||
year = seq = None
|
||||
search_method = "person_name"
|
||||
|
||||
# Execute Playwright flow
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
except ImportError:
|
||||
return _error_result(
|
||||
county=county_normalized,
|
||||
case_number_searched=case_number or "",
|
||||
error="playwright not installed. Run: pip install playwright && playwright install chromium",
|
||||
fetched_at=fetched_at,
|
||||
)
|
||||
|
||||
base_url = f"https://www.civitekflorida.com/ocrs/county/{code}/"
|
||||
errors: list[str] = []
|
||||
results: list[dict] = []
|
||||
final_url = base_url
|
||||
status_from_results = "UNKNOWN"
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=headless)
|
||||
ctx = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120",
|
||||
)
|
||||
page = ctx.new_page()
|
||||
page.set_default_timeout(timeout_seconds * 1000)
|
||||
|
||||
# Step 1: Entry page
|
||||
page.goto(base_url)
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
# Step 2: Click Public
|
||||
page.locator("button:has-text('Public')").first.click()
|
||||
page.wait_for_timeout(2500)
|
||||
|
||||
# Step 3: Click I Agree (disclaimer)
|
||||
agree_btn = page.locator("button:has-text('I Agree')").first
|
||||
if agree_btn.count() == 0:
|
||||
errors.append("Disclaimer page didn't show 'I Agree' button")
|
||||
browser.close()
|
||||
return _error_result(county_normalized, case_number or "", "; ".join(errors), fetched_at)
|
||||
agree_btn.click()
|
||||
page.wait_for_timeout(2500)
|
||||
final_url = page.url
|
||||
|
||||
# Step 4: Fill form based on search_method
|
||||
if search_method == "case_number":
|
||||
# Switch to Case Search tab (data-index=1)
|
||||
case_tab = page.locator("li[role='tab']:has-text('Case Search')").first
|
||||
if case_tab.count() == 0:
|
||||
errors.append("Case Search tab not found")
|
||||
else:
|
||||
case_tab.click()
|
||||
page.wait_for_timeout(1500)
|
||||
page.fill("#form\\:search_tab\\:year", year)
|
||||
page.fill("#form\\:search_tab\\:seq", seq)
|
||||
elif search_method == "person_name":
|
||||
# Person Search tab is default (data-index=0)
|
||||
page.fill("#form\\:search_tab\\:lastname", party_lastname)
|
||||
if party_firstname:
|
||||
page.fill("#form\\:search_tab\\:fname", party_firstname)
|
||||
elif search_method == "business_name":
|
||||
page.fill("#form\\:search_tab\\:businessname", business_name)
|
||||
|
||||
# Step 5: Submit
|
||||
search_btn = page.locator(
|
||||
"button:has(.ui-button-text:text-is('Search'))"
|
||||
).first
|
||||
if search_btn.count() == 0:
|
||||
search_btn = page.locator("button:has-text('Search')").first
|
||||
search_btn.click()
|
||||
page.wait_for_timeout(6000)
|
||||
|
||||
# Step 6: Capture validation errors
|
||||
err_msgs = page.locator(".ui-message-error, .ui-messages-error").all()
|
||||
for m in err_msgs[:5]:
|
||||
try:
|
||||
t = (m.inner_text() or "").strip()
|
||||
if t and t not in errors:
|
||||
errors.append(t[:200])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Step 7: Parse results table
|
||||
# Civitek results render in a data table with role=grid or as a panelgrid.
|
||||
# Look for tables that contain "Case" headers
|
||||
results = _parse_results(page)
|
||||
final_url = page.url
|
||||
|
||||
browser.close()
|
||||
except PWTimeout as e:
|
||||
errors.append(f"Playwright timeout: {e}")
|
||||
except Exception as e:
|
||||
errors.append(f"Playwright crashed: {type(e).__name__}: {e}")
|
||||
|
||||
# Determine status from results
|
||||
if errors and not results:
|
||||
status_from_results = "ERROR" if any("crashed" in e.lower() or "timeout" in e.lower() for e in errors) else "NOT_FOUND"
|
||||
elif not results:
|
||||
status_from_results = "NOT_FOUND"
|
||||
else:
|
||||
# Has results — infer from case_type
|
||||
first_type = (results[0].get("case_type") or "").upper()
|
||||
if "CA" in first_type or "CIVIL" in first_type:
|
||||
status_from_results = "FORECLOSURE_PENDING" # CA cases include foreclosures
|
||||
elif "CF" in first_type or "FELONY" in first_type:
|
||||
status_from_results = "CLEAN" # unrelated criminal
|
||||
else:
|
||||
status_from_results = "UNKNOWN"
|
||||
|
||||
# Top result enriched
|
||||
case_data = results[0] if results else None
|
||||
|
||||
return {
|
||||
"status": status_from_results,
|
||||
"county": county_normalized,
|
||||
"case_number_searched": case_number or "",
|
||||
"search_method": search_method,
|
||||
"results": results,
|
||||
"case_data": case_data,
|
||||
"lis_pendens": [],
|
||||
"liens_inventory": {},
|
||||
"sources_used": ["civitek_ocrs"],
|
||||
"source_url": final_url,
|
||||
"errors": errors,
|
||||
"fetched_at": fetched_at,
|
||||
}
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# Internal helpers
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _parse_results(page) -> list[dict]:
|
||||
"""Parse the results table from a Civitek search results page.
|
||||
|
||||
Civitek renders results as a DataTable (PrimeFaces). Look for tables with
|
||||
case-related headers. Returns list of dicts with case_number, parties,
|
||||
filed_date, case_type.
|
||||
"""
|
||||
results: list[dict] = []
|
||||
tables = page.locator("table").all()
|
||||
for tbl in tables:
|
||||
try:
|
||||
rows = tbl.locator("tr")
|
||||
row_count = rows.count()
|
||||
if row_count < 2:
|
||||
continue
|
||||
# Header row
|
||||
headers_raw = rows.first.locator("th, td").all()
|
||||
headers = [(h.inner_text() or "").strip().lower() for h in headers_raw]
|
||||
# Heuristic: this table has case results if headers include any
|
||||
# of "case", "uniform", "date", "party", "type"
|
||||
if not any(any(kw in h for kw in ("case", "uniform", "filed", "party", "type"))
|
||||
for h in headers):
|
||||
continue
|
||||
# Index columns
|
||||
col_idx = {}
|
||||
for i, h in enumerate(headers):
|
||||
if "case" in h or "uniform" in h:
|
||||
col_idx["case_number"] = i
|
||||
elif "type" in h or "court" in h:
|
||||
col_idx["case_type"] = i
|
||||
elif "filed" in h or "date" in h:
|
||||
col_idx["filed_date"] = i
|
||||
elif "party" in h or "name" in h or "defendant" in h or "plaintiff" in h:
|
||||
col_idx["parties"] = i
|
||||
elif "status" in h:
|
||||
col_idx["status"] = i
|
||||
|
||||
# Data rows
|
||||
for r in range(1, row_count):
|
||||
cells = rows.nth(r).locator("td").all()
|
||||
if not cells:
|
||||
continue
|
||||
cell_texts = [(c.inner_text() or "").strip() for c in cells]
|
||||
row_data = {}
|
||||
for k, i in col_idx.items():
|
||||
if i < len(cell_texts):
|
||||
row_data[k] = cell_texts[i]
|
||||
if row_data:
|
||||
results.append(row_data)
|
||||
except Exception:
|
||||
continue
|
||||
# If we found a results table with rows, stop
|
||||
if results:
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
def _error_result(
|
||||
county: str,
|
||||
case_number_searched: str,
|
||||
error: str,
|
||||
fetched_at: str,
|
||||
) -> dict:
|
||||
return {
|
||||
"status": "ERROR",
|
||||
"county": county,
|
||||
"case_number_searched": case_number_searched,
|
||||
"search_method": None,
|
||||
"results": [],
|
||||
"case_data": None,
|
||||
"lis_pendens": [],
|
||||
"liens_inventory": {},
|
||||
"sources_used": ["civitek_ocrs"],
|
||||
"source_url": "",
|
||||
"errors": [error],
|
||||
"fetched_at": fetched_at,
|
||||
}
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# CLI for manual testing
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Civitek OCRS adapter manual test")
|
||||
parser.add_argument("--county", required=True, help="County name (e.g., Hernando)")
|
||||
parser.add_argument("--case", help="Case number (e.g., 2024-CA-001234)")
|
||||
parser.add_argument("--last-name", help="Last name for person search")
|
||||
parser.add_argument("--first-name", help="First name (optional with last-name)")
|
||||
parser.add_argument("--business", help="Business name search")
|
||||
parser.add_argument("--no-headless", action="store_true", help="Show browser window")
|
||||
args = parser.parse_args()
|
||||
|
||||
result = fetch_civitek_court_records(
|
||||
county_name=args.county,
|
||||
case_number=args.case,
|
||||
party_lastname=args.last_name,
|
||||
party_firstname=args.first_name,
|
||||
business_name=args.business,
|
||||
headless=not args.no_headless,
|
||||
)
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
Reference in New Issue
Block a user