"""court_records.py — Deterministic foreclosure detection via county clerk records. PROBLEMA QUE RESUELVE: El sistema heuristico (price_validator.py + property_value.py) HIPOTETIZA que un listing sospechosamente bajo es foreclosure. Para CONFIRMAR deterministicamente necesitamos consultar los court records publicos del condado: - Lis pendens (notice of foreclosure filing) - Code enforcement violations + liens - Tax delinquency ALCANCE WAVE 1.5A: - Solo DUVAL (Jacksonville) implementado en esta version - Otros condados: soft-fail con URL del clerk para lookup manual - Replicacion a Miami-Dade / Broward / Palm Beach / Hillsborough en versiones posteriores SI Duval funciona end-to-end. STACK: - Playwright headless Chromium (local, $0 por consulta) - Fallback a Firecrawl si Playwright falla (opcional, requiere ENABLE_FIRECRAWL=true) - User-Agent identificable + rate-limit 1 req/2s por dominio - Cache TTL 7 dias (los procesos judiciales se mueven lento) OPT-IN: ENABLE_COURT_RECORDS=true en .env """ from __future__ import annotations import os import re import time from datetime import datetime, timezone from pathlib import Path from typing import Optional # Rate limit por dominio (un request c/2s segun la regla) _DOMAIN_LAST_REQUEST: dict[str, float] = {} _RATE_LIMIT_SECONDS = 2.0 # User-Agent identificable (no spoof — somos un servicio legitimo) USER_AGENT = "AR-House/1.0 (real estate investment analysis; +https://ar-house.example/contact)" # Counties con scraper implementado en esta version SUPPORTED_COUNTIES = {"Duval", "duval"} # ═══════════════════════════════════════════════════════════════════════════ # Wave 1.5A v1.2: Plaintiff classification + Lien survival analysis # ═══════════════════════════════════════════════════════════════════════════ # Categorias de plaintiff (quien demanda el foreclosure) PLAINTIFF_TYPE_BANK_NATIONAL = "BANK_NATIONAL" # Wells Fargo, BofA, Chase, Citi, etc PLAINTIFF_TYPE_BANK_REGIONAL = "BANK_REGIONAL" # Truist, Regions, BB&T, Fifth Third PLAINTIFF_TYPE_CREDIT_UNION = "CREDIT_UNION" # Navy Federal, VyStar, etc PLAINTIFF_TYPE_NONBANK_MORTGAGE = "NONBANK_MORTGAGE" # Quicken/Rocket, PHH, Mr. Cooper, Carrington PLAINTIFF_TYPE_GSE = "GSE" # Fannie Mae, Freddie Mac, Ginnie Mae PLAINTIFF_TYPE_TRUSTEE = "TRUSTEE_MBS" # Deutsche Bank AS Trustee, US Bank NA Trustee (MBS trusts) PLAINTIFF_TYPE_IRS = "IRS_FEDERAL" # Internal Revenue Service (federal tax) PLAINTIFF_TYPE_STATE_TAX = "STATE_TAX" # FL Dept of Revenue PLAINTIFF_TYPE_HOA = "HOA_ASSOCIATION" # Homeowners / Condo association PLAINTIFF_TYPE_MUNICIPAL = "MUNICIPAL" # City/County code enforcement, utility liens PLAINTIFF_TYPE_HARD_MONEY = "HARD_MONEY_LENDER" # LLC nonbank, private high-rate lender PLAINTIFF_TYPE_PRIVATE = "PRIVATE_INDIVIDUAL" # Private investor (named person) PLAINTIFF_TYPE_OTHER = "OTHER" PLAINTIFF_TYPE_UNKNOWN = "UNKNOWN" # Mapeo de keywords del nombre del plaintiff a su categoria _BANK_NATIONAL_KEYWORDS = ( "WELLS FARGO", "BANK OF AMERICA", "CHASE", "JPMORGAN", "JP MORGAN", "CITIBANK", "CITI ", "CITI,", "U.S. BANK", "US BANK", "USBANK", "PNC BANK", "TD BANK", "HSBC", "CAPITAL ONE", ) _BANK_REGIONAL_KEYWORDS = ( "TRUIST", "REGIONS BANK", "BB&T", "BBT BANK", "FIFTH THIRD", "5/3 BANK", "SUNTRUST", "M&T BANK", "KEYBANK", "HUNTINGTON", ) _CREDIT_UNION_KEYWORDS = ("CREDIT UNION", "VYSTAR", "NAVY FEDERAL", "FCU", "C.U.") _NONBANK_MORTGAGE_KEYWORDS = ( "QUICKEN", "ROCKET MORTGAGE", "PHH MORTGAGE", "MR. COOPER", "MR COOPER", "NATIONSTAR", "CARRINGTON", "FREEDOM MORTGAGE", "LOANDEPOT", "PENNYMAC", "NEW REZ", "NEWREZ", "SHELLPOINT", "OCWEN", "DITECH", "BAYVIEW", "SPECIALIZED LOAN SERVICING", ) _GSE_KEYWORDS = ("FANNIE MAE", "FEDERAL NATIONAL MORTGAGE", "FNMA", "FREDDIE MAC", "FEDERAL HOME LOAN MORTGAGE", "FHLMC", "GINNIE MAE", "GNMA") _TRUSTEE_KEYWORDS = ("AS TRUSTEE", "AS INDENTURE TRUSTEE", "TRUSTEE FOR", "DEUTSCHE BANK NATIONAL", "BANK OF NEW YORK MELLON", "BNY MELLON", "WILMINGTON TRUST", "WILMINGTON SAVINGS") _IRS_KEYWORDS = ("INTERNAL REVENUE SERVICE", "I.R.S.", "U.S. INTERNAL REVENUE", "UNITED STATES OF AMERICA") _STATE_TAX_KEYWORDS = ("FLORIDA DEPARTMENT OF REVENUE", "FL DEPT OF REVENUE", "STATE OF FLORIDA") _HOA_KEYWORDS = ("HOMEOWNERS", "ASSOCIATION INC", "ASSOCIATION, INC", "CONDOMINIUM", "PROPERTY OWNERS ASSOCIATION", " POA ", "HOA") _MUNICIPAL_KEYWORDS = ("CITY OF ", "COUNTY OF ", "MUNICIPALITY", "CODE ENFORCEMENT", "TAX COLLECTOR") def classify_plaintiff(name: Optional[str]) -> dict: """Clasifica el plaintiff de un foreclosure por su nombre. Returns: { name: , type: PLAINTIFF_TYPE_*, category: "primary_lender" | "junior_lienholder" | "tax_authority" | "association" | "government" | "unknown", is_original_loan_holder: bool | None (None si no se puede determinar) } """ if not name: return { "name": None, "type": PLAINTIFF_TYPE_UNKNOWN, "category": "unknown", "is_original_loan_holder": None, } upper = name.upper() # Order matters: more specific patterns first (trustee MBS antes que bank national) if any(kw in upper for kw in _TRUSTEE_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_TRUSTEE, "category": "mbs_trustee", "is_original_loan_holder": False, "note": "MBS trustee: el loan fue securitizado. El servicer real puede ser otra entity."} if any(kw in upper for kw in _IRS_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_IRS, "category": "tax_authority", "is_original_loan_holder": False, "note": "IRS federal tax lien. SOBREVIVE el foreclosure con 120-day right of redemption."} if any(kw in upper for kw in _STATE_TAX_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_STATE_TAX, "category": "tax_authority", "is_original_loan_holder": False, "note": "FL state tax lien. Tipicamente extinguible pero verificar con title search."} if any(kw in upper for kw in _GSE_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_GSE, "category": "primary_lender", "is_original_loan_holder": False, "note": "GSE (Fannie/Freddie/Ginnie). Compraron el loan al originador. Comun en MLS post-foreclosure como REO."} if any(kw in upper for kw in _BANK_NATIONAL_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_BANK_NATIONAL, "category": "primary_lender", "is_original_loan_holder": True, "note": "Banco nacional grande. Probablemente originador del loan. Procesos estandarizados."} if any(kw in upper for kw in _BANK_REGIONAL_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_BANK_REGIONAL, "category": "primary_lender", "is_original_loan_holder": True, "note": "Banco regional. Mas flexible para negociar short sale o cash-for-keys."} if any(kw in upper for kw in _CREDIT_UNION_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_CREDIT_UNION, "category": "primary_lender", "is_original_loan_holder": True, "note": "Credit union. Members-only, foreclosure menos frecuente, mas dispuestos a workout."} if any(kw in upper for kw in _NONBANK_MORTGAGE_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_NONBANK_MORTGAGE, "category": "primary_lender", "is_original_loan_holder": False, "note": "Non-bank mortgage servicer. Suele ser servicer asignado, no el originador. Investor real es otro."} if any(kw in upper for kw in _HOA_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_HOA, "category": "association", "is_original_loan_holder": False, "note": "HOA/Condo association. FL Statute 720.3085(2)(b) limita lo que sobrevive a 12 meses dues o 1% del mortgage."} if any(kw in upper for kw in _MUNICIPAL_KEYWORDS): return {"name": name, "type": PLAINTIFF_TYPE_MUNICIPAL, "category": "government", "is_original_loan_holder": False, "note": "Municipal lien (code enforcement / utility). SOBREVIVE el foreclosure — corre con la tierra."} # LLC sin keyword conocido = probable hard money / private investor if "LLC" in upper or "L.L.C." in upper: return {"name": name, "type": PLAINTIFF_TYPE_HARD_MONEY, "category": "primary_lender", "is_original_loan_holder": True, "note": "LLC sin patron de banco/servicer conocido. Probable hard money lender o private investor. Tasas 8-15%, terminos cortos."} # Si el nombre tiene formato "APELLIDO, NOMBRE" o sin entity → individual if "," in name or (not any(s in upper for s in ("INC", "CORP", "BANK", "LLC", "TRUST")) and len(name.split()) <= 4): return {"name": name, "type": PLAINTIFF_TYPE_PRIVATE, "category": "primary_lender", "is_original_loan_holder": None, "note": "Individuo (no entity). Posible seller financing, family loan, o private money."} return {"name": name, "type": PLAINTIFF_TYPE_OTHER, "category": "unknown", "is_original_loan_holder": None, "note": "Nombre no matchea patrones conocidos. Revisar manualmente."} # ═══════════════════════════════════════════════════════════════════════════ # Lien types + survival analysis # ═══════════════════════════════════════════════════════════════════════════ # Tipos de lien (sigue convencion estandar US real estate) LIEN_TYPE_MORTGAGE_1ST = "MORTGAGE_1ST" LIEN_TYPE_MORTGAGE_2ND = "MORTGAGE_2ND" LIEN_TYPE_MORTGAGE_3RD = "MORTGAGE_3RD" LIEN_TYPE_HELOC = "HELOC" LIEN_TYPE_IRS_TAX = "IRS_TAX_LIEN" LIEN_TYPE_STATE_TAX = "STATE_TAX_LIEN" LIEN_TYPE_PROPERTY_TAX = "PROPERTY_TAX_LIEN" # County property tax delinquency LIEN_TYPE_HOA = "HOA_LIEN" LIEN_TYPE_MECHANICS = "MECHANICS_LIEN" LIEN_TYPE_MUNICIPAL = "MUNICIPAL_LIEN" # Code enforcement, utility LIEN_TYPE_JUDGMENT = "JUDGMENT_LIEN" LIEN_TYPE_OTHER = "OTHER" # Survival outcomes SURVIVES = "SURVIVES" EXTINGUISHED = "EXTINGUISHED" EXTINGUISHED_BY_THIS_ACTION = "EXTINGUISHED_BY_THIS_ACTION" # plaintiff's own mortgage UNCERTAIN = "UNCERTAIN" def analyze_lien_survival( *, lien_type: str, is_plaintiff_lien: bool = False, is_senior_to_plaintiff: bool = False, lien_filing_date: Optional[str] = None, plaintiff_filing_date: Optional[str] = None, ) -> dict: """Determina si un lien sobrevive un foreclosure judicial en Florida. Reglas FL standard: - Plaintiff's own mortgage lien: EXTINGUISHED_BY_THIS_ACTION - Liens senior (filed before) al plaintiff: SURVIVE (need to be paid off) - Liens junior (filed after) al plaintiff: EXTINGUISHED - IRS federal tax lien: SURVIVES with 120-day right of redemption (26 USC 7425(d)) - HOA dues: SURVIVE pero limitados a 12 mo o 1% del mortgage (FL 720.3085, 718.116) - Municipal/Code Enforcement: SURVIVES — runs with the land - Property tax: ALWAYS SURVIVES (super-priority) Returns: { survives_foreclosure: SURVIVES | EXTINGUISHED | EXTINGUISHED_BY_THIS_ACTION | UNCERTAIN, warning: str | None, legal_basis: str, } """ # Plaintiff's own mortgage gets wiped by the foreclosure that the plaintiff filed if is_plaintiff_lien: return { "survives_foreclosure": EXTINGUISHED_BY_THIS_ACTION, "warning": None, "legal_basis": "Plaintiff's own mortgage is the subject of this foreclosure — extinguished by judicial sale.", } # Super-priority liens (always survive regardless of filing order) if lien_type == LIEN_TYPE_PROPERTY_TAX: return { "survives_foreclosure": SURVIVES, "warning": "Property tax delinquency tiene SUPER-PRIORIDAD. Sobrevive a TODO. Pagar inmediatamente post-cierre o el tax collector vende el property por tax deed.", "legal_basis": "FL Statute 197.122 — ad valorem taxes constitute first lien superior to all other liens.", } if lien_type == LIEN_TYPE_IRS_TAX: return { "survives_foreclosure": SURVIVES, "warning": "IRS federal tax lien SOBREVIVE el foreclosure. El IRS tiene 120-day right of redemption (puede recomprar el property pagando el bid + costos). Despues de 120 dias, el buyer queda dueno definitivamente. Sumar al MAB.", "legal_basis": "26 USC 7425(d) — federal tax liens survive judicial sale with 120-day redemption period.", } if lien_type == LIEN_TYPE_MUNICIPAL: return { "survives_foreclosure": SURVIVES, "warning": "Lien municipal (code enforcement / utility) SOBREVIVE — corre con la tierra. Sumar al MAB. Check code enforcement violations open antes de bidear.", "legal_basis": "FL Statute 162.09 — code enforcement liens equivalent to civil judgment, runs with land.", } if lien_type == LIEN_TYPE_HOA: return { "survives_foreclosure": SURVIVES, "warning": "HOA dues SOBREVIVE pero limitado por FL Statute 720.3085(2)(b): el nuevo owner debe 12 meses de dues o 1% del original mortgage, lo que sea menor. Si es condo: FL 718.116. Pedir HOA estoppel letter pre-bid.", "legal_basis": "FL Statute 720.3085(2)(b) (HOA) o 718.116 (condo) — buyer hereda capped portion.", } if lien_type == LIEN_TYPE_STATE_TAX: # State tax liens vary by chronology if is_senior_to_plaintiff: return { "survives_foreclosure": SURVIVES, "warning": "State tax lien filed BEFORE plaintiff's mortgage → sobrevive.", "legal_basis": "FL Statute 197.0 — chronological priority among non-super-priority liens.", } return { "survives_foreclosure": EXTINGUISHED, "warning": "State tax lien filed AFTER plaintiff's mortgage típicamente se extingue. Validar con title search.", "legal_basis": "Junior liens (post-mortgage) extinguished by foreclosure of senior lien.", } # Standard junior/senior analysis for mortgages, mechanics, judgment liens if lien_type in (LIEN_TYPE_MORTGAGE_2ND, LIEN_TYPE_MORTGAGE_3RD, LIEN_TYPE_HELOC, LIEN_TYPE_MECHANICS, LIEN_TYPE_JUDGMENT): if is_senior_to_plaintiff: return { "survives_foreclosure": SURVIVES, "warning": f"{lien_type} filed BEFORE plaintiff's mortgage → sobrevive. Sumar al MAB.", "legal_basis": "Senior lien sobrevive foreclosure de lien junior.", } return { "survives_foreclosure": EXTINGUISHED, "warning": None, "legal_basis": f"{lien_type} junior al plaintiff's mortgage — extinguido por foreclosure judicial.", } # 1st mortgage (no es del plaintiff) — significa que hay otra hipoteca senior if lien_type == LIEN_TYPE_MORTGAGE_1ST: if is_senior_to_plaintiff: return { "survives_foreclosure": SURVIVES, "warning": "Existe un mortgage senior al plaintiff's lien. Buyer hereda ESTA hipoteca. Sumar saldo al MAB.", "legal_basis": "Senior mortgage survives foreclosure of junior lien.", } return { "survives_foreclosure": EXTINGUISHED, "warning": None, "legal_basis": "1st mortgage junior al plaintiff (raro pero posible si plaintiff es property tax/super-priority).", } # Fallback return { "survives_foreclosure": UNCERTAIN, "warning": f"Tipo de lien '{lien_type}' no tiene regla automatica. Title search profesional ($300-500) requerido.", "legal_basis": "Default safety: assume UNCERTAIN para tipos no clasificados.", } def _empty_liens_inventory(reason: str) -> dict: """Estructura vacía estandar para liens cuando v1.1 scraper no esta listo. Mantiene el shape del output asi los downstream consumers no rompen. """ return { "all_liens": [], "lien_count": 0, "total_surviving_debt": 0, "investor_warning": ( "⚠️ Liens detail no disponible automaticamente (Wave 1.5A v1.1 deferred a Phase 3.5). " "ANTES de cualquier oferta o bid: hacer **title search profesional** " "($300-500 USD) en or.duvalclerk.com filtering por document types: " "MTG (mortgages), NFTL (IRS federal tax liens), SATL (state tax), " "NOC (mechanics), COD (code enforcement). Listing puede tener hasta " "$30K-$100K en deudas heredables NO visibles aqui." ), "detail_status": "PENDING_V1_1", "detail_pending_reason": reason, } # URL del clerk por condado (para soft-fail mensajes en condados no implementados) COUNTY_CLERK_URLS = { "Duval": "https://www.duvalclerk.com/online-option/court-records", "Miami-Dade": "https://www2.miami-dadeclerk.com/ocs", "Broward": "https://officialrecords.broward.org", "Palm Beach": "https://www.mypalmbeachclerk.com/departments/records-services-division", "Hillsborough": "https://hillsclerk.com", "Orange": "https://myorangeclerk.com", } def _enable_court_records() -> bool: return os.getenv("ENABLE_COURT_RECORDS", "false").lower() == "true" def _rate_limit(domain: str) -> None: """Bloquea si llamamos al mismo dominio < 2s atras.""" now = time.time() last = _DOMAIN_LAST_REQUEST.get(domain, 0) delta = now - last if delta < _RATE_LIMIT_SECONDS: time.sleep(_RATE_LIMIT_SECONDS - delta) _DOMAIN_LAST_REQUEST[domain] = time.time() # ═══════════════════════════════════════════════════════════════════════════ # Duval (Jacksonville) — implementacion completa # ═══════════════════════════════════════════════════════════════════════════ # Diccionarios para parseo de address (Duval Property Appraiser tiene 5 campos) _STREET_DIRECTIONS = {"N", "S", "E", "W", "NE", "NW", "SE", "SW", "NORTH", "SOUTH", "EAST", "WEST"} _STREET_SUFFIXES = { "ST": "ST", "STREET": "ST", "AVE": "AVE", "AVENUE": "AVE", "RD": "RD", "ROAD": "RD", "BLVD": "BLVD", "BOULEVARD": "BLVD", "LN": "LN", "LANE": "LN", "WAY": "WAY", "DR": "DR", "DRIVE": "DR", "CT": "CT", "COURT": "CT", "PL": "PL", "PLACE": "PL", "CIR": "CIR", "CIRCLE": "CIR", "TER": "TER", "TERRACE": "TER", "PKWY": "PKWY", "PARKWAY": "PKWY", "HWY": "HWY", "HIGHWAY": "HWY", "TRL": "TRL", "TRAIL": "TRL", "XING": "XING", "CROSSING": "XING", "ALY": "ALY", "ALLEY": "ALY", "BND": "BND", "BEND": "BND", } def _parse_address_duval(address: str) -> Optional[dict]: """Parse "3245 N Pearl St, Jacksonville, FL 32206" into Duval form fields. Returns: {street_num: "3245", prefix: "N", name: "PEARL", suffix: "ST", zip: "32206"} o None si no se pudo parsear. """ # Strip post-comma (city/state) y aislar street part street_part = address.split(",")[0].strip() # Capturar ZIP del original si lo hay zip_match = re.search(r"\b(\d{5})(?:-\d{4})?\b", address) zip_code = zip_match.group(1) if zip_match else "" tokens = street_part.upper().split() if len(tokens) < 2 or not tokens[0].isdigit(): return None street_num = tokens[0] rest = tokens[1:] # Detectar prefix (direction) en el primer token despues del numero prefix = "" if rest and rest[0] in _STREET_DIRECTIONS: # Normalize NORTH→N etc prefix_raw = rest.pop(0) prefix_map = {"NORTH": "N", "SOUTH": "S", "EAST": "E", "WEST": "W"} prefix = prefix_map.get(prefix_raw, prefix_raw) # Detectar suffix en el ultimo token suffix = "" if rest and rest[-1] in _STREET_SUFFIXES: suffix = _STREET_SUFFIXES[rest.pop()] name = " ".join(rest) if not name: return None return { "street_num": street_num, "prefix": prefix, "name": name, "suffix": suffix, "zip": zip_code, } def _fetch_property_owner_duval(address: str) -> tuple[Optional[dict], list[str]]: """Step 1 Duval: dado un address, buscar el owner name + RE# en el Property Appraiser. Sitio: https://paopropertysearch.coj.net/Basic/Search.aspx Form fields (ASP.NET, descubiertos via DOM inspection): - ctl00$cphBody$tbStreetNumber → numero - ctl00$cphBody$ddStreetPrefix → select (N/S/E/W) - ctl00$cphBody$tbStreetName → nombre (sin prefix ni suffix) - ctl00$cphBody$ddStreetSuffix → select (ST/AVE/RD/...) - ctl00$cphBody$tbZipCode → ZIP opcional - ctl00$cphBody$bSearch → submit Resultado en Results.aspx (puede ser direct match o tabla). Returns ({owner_name, re_number, year_built, tax_assessed, last_sale_date}, errors) """ errors: list[str] = [] try: from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout except ImportError as e: errors.append(f"playwright no instalado: {e}") return None, errors parsed = _parse_address_duval(address) if not parsed: errors.append(f"No pude parsear el address (formato esperado: ' [prefix] [suffix]'): {address}") return None, errors _rate_limit("paopropertysearch.coj.net") try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context(user_agent=USER_AGENT) page = context.new_page() page.set_default_timeout(15_000) # IMPORTANTE: wait_until='networkidle' (no 'domcontentloaded') porque el # sitio Duval usa ASP.NET WebForms con WebForm_DoPostBackWithOptions, una # funcion JS que se carga DESPUES del DOMContentLoaded. Si clickeamos # Search antes que esa JS exista, el click no submite nada (PAGEERROR). page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx", wait_until="networkidle", timeout=20_000) # Nota: el sitio Duval ASP.NET no carga WebForm_DoPostBackWithOptions # (probablemente por su compat IE8 mode). No esperamos a esa funcion — # vamos directo al fallback de form.submit() que funciona consistentemente. # Llenar form fields con los selectores REALES del sitio Duval ASP.NET page.locator("#ctl00_cphBody_tbStreetNumber").fill(parsed["street_num"]) if parsed["prefix"]: try: page.locator("#ctl00_cphBody_ddStreetPrefix").select_option( value=parsed["prefix"] ) except Exception: # Fallback: por label try: page.locator("#ctl00_cphBody_ddStreetPrefix").select_option( label=parsed["prefix"] ) except Exception: pass # Si no matchea, dejar empty y confiar en street name match page.locator("#ctl00_cphBody_tbStreetName").fill(parsed["name"]) if parsed["suffix"]: try: page.locator("#ctl00_cphBody_ddStreetSuffix").select_option( value=parsed["suffix"] ) except Exception: try: page.locator("#ctl00_cphBody_ddStreetSuffix").select_option( label=parsed["suffix"] ) except Exception: pass if parsed["zip"]: try: page.locator("#ctl00_cphBody_tbZipCode").fill(parsed["zip"]) except Exception: pass # Submit via JS form.submit() — bypasea WebForm_DoPostBackWithOptions # que el sitio Duval no carga correctamente. submitted = False try: page.evaluate("""() => { const form = document.forms[0] || document.querySelector('form'); if (!form) throw new Error('no form found'); form.action = 'Results.aspx'; // ASP.NET espera el button name como input para detectar el click let hidden = document.createElement('input'); hidden.type = 'hidden'; hidden.name = 'ctl00$cphBody$bSearch'; hidden.value = 'Search'; form.appendChild(hidden); form.submit(); }""") page.wait_for_url("**Results.aspx**", timeout=10_000) submitted = True except Exception as e: errors.append(f"Property Appraiser: form submit fallo: {e}") try: page.wait_for_load_state("networkidle", timeout=10_000) except PlaywrightTimeout: pass current_url = page.url if not submitted: errors.append( f"Property Appraiser: submit no navego a Results.aspx (URL final: {current_url}). " "ASP.NET WebForms postback fallo en las 3 estrategias (click/Enter/JS-eval)." ) # Parse Results.aspx — la pagina tiene una tabla con columnas conocidas: # RE #, Name (Last First), Street #, Street Name, Type, Direction, Unit, City, Zip # Si "No Results Found" en body → property no existe en Duval PA body_text = page.locator("body").inner_text() if submitted else "" owner_name: Optional[str] = None re_number: Optional[str] = None year_built: Optional[int] = None tax_assessed: Optional[int] = None last_sale_date: Optional[str] = None no_results = "No Results Found" in body_text or "No information available" in body_text if no_results: errors.append( f"Property Appraiser: 'No Results Found' para " f"{parsed['street_num']} {parsed['prefix']} {parsed['name']} {parsed['suffix']}. " "Address probablemente no existe en Duval PA database o esta fuera del condado." ) elif submitted: # Parsear tabla de resultados via DOM try: results_table = page.locator("table").first rows = results_table.locator("tr").all() if len(rows) >= 2: # Row 0 = headers, Row 1+ = data # Buscar la fila que matchee el street # exacto si tenemos varios resultados best_row = None for r in rows[1:]: cells = [(c.text_content() or "").strip() for c in r.locator("td").all()] if len(cells) >= 9 and cells[2] == parsed["street_num"]: # Match exacto del street # best_row = cells break if not best_row: # Sin match exacto, tomar el primero cells_first = [(c.text_content() or "").strip() for c in rows[1].locator("td").all()] if len(cells_first) >= 9: best_row = cells_first if best_row: re_number = best_row[0] or None owner_name = best_row[1] or None # Otros campos en el detail page (TODO si necesario) except Exception as e: errors.append(f"Property Appraiser: error parseando tabla de resultados: {e}") browser.close() if not owner_name and not re_number: return None, errors return { "owner_name": owner_name, "re_number": re_number, "year_built": year_built, "tax_assessed_value": tax_assessed, "last_sale_date": last_sale_date, "source": "Duval Property Appraiser (paopropertysearch.coj.net)", "result_url": current_url, }, errors except Exception as e: errors.append(f"Property Appraiser Duval scrape error: {e}") return None, errors def _fetch_lis_pendens_duval(owner_name: str, address: str) -> tuple[list[dict], list[str]]: """Step 2 Duval: dado un owner name, buscar lis pendens recientes en Official Records. Sitio: https://or.duvalclerk.com/ Flujo (descubierto via DOM inspection): 1. Landing tiene Disclaimer form. Click "I accept the conditions above" (id='btnButton'). 2. Despues del POST, redirige a la home autenticada-as-guest. 3. Navegar a /search/SearchTypeName (link directo). 4. Llenar Last Name + First Name. 5. Submit. Resultados muestran columns: Doc Type, Recording Date, Grantor, Grantee. 6. Filter por Doc Type que contenga "LIS PENDENS" o codigo "LP". Owner name del Property Appraiser viene como "JONES JOHN N" o "CITY OF JACKSONVILLE" — formato "LASTNAME FIRSTNAME MIDDLE_INITIAL". Returns (list of {case_number, filing_date, plaintiff, doc_type, instrument_num}, errors_list) """ errors: list[str] = [] matches: list[dict] = [] try: from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout except ImportError as e: errors.append(f"playwright no instalado: {e}") return matches, errors _rate_limit("or.duvalclerk.com") try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context(user_agent=USER_AGENT) page = context.new_page() page.set_default_timeout(15_000) # Step 1: Aceptar disclaimer page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000) try: # El boton tiene id='btnButton' (descubierto via DOM inspection) page.locator("#btnButton").click() page.wait_for_load_state("networkidle", timeout=10_000) except Exception as e: errors.append(f"Official Records: error aceptando disclaimer: {e}") browser.close() return matches, errors # Step 2: Navegar al Name Search try: page.goto("https://or.duvalclerk.com/search/SearchTypeName", wait_until="networkidle", timeout=15_000) except Exception as e: errors.append(f"Official Records: no pude navegar a SearchTypeName: {e}") browser.close() return matches, errors # Step 3: Llenar form de busqueda por nombre # Owner name del Duval PA viene como "LASTNAME FIRSTNAME M" o "LASTNAME LASTNAME2 FIRSTNAME" # Algunos casos especiales: "CITY OF JACKSONVILLE" (no es persona) # "JONES JOHN N" → last=JONES, first=JOHN, middle=N parts = owner_name.strip().split() last_name = parts[0] if parts else owner_name first_name = parts[1] if len(parts) > 1 else "" # Dump form structure if first time (errors=[] for debug) # Selectores tipicos para nombre en este sistema (Acclaim Land Records) ln_selectors = [ "input[name='lastName']", "input[id='lastName']", "input[name='LastName']", "input[id='LastName']", "input[name*='last']", "input[id*='last']", ] fn_selectors = [ "input[name='firstName']", "input[id='firstName']", "input[name='FirstName']", "input[id='FirstName']", "input[name*='first']", "input[id*='first']", ] ln_filled = False for sel in ln_selectors: if page.locator(sel).count() > 0: page.locator(sel).first.fill(last_name) ln_filled = True break if not ln_filled: # v1: degradar gracefully — Acclaim Land Records system tiene form # dinamico (JS-rendered) que requiere mas iteracion. v1.1 lo arregla. errors.append( "Official Records v1: Name Search form selectors no encontrados. " "Lis pendens lookup automatico no disponible en este release. " "Lookup manual: https://or.duvalclerk.com/search/SearchTypeName " f"con last_name='{last_name}', first_name='{first_name}'" ) browser.close() return matches, errors for sel in fn_selectors: if page.locator(sel).count() > 0 and first_name: page.locator(sel).first.fill(first_name) break # Submit search_btns = [ "input[type='submit'][value*='Search']", "button:has-text('Search')", "input[type='button'][value*='Search']", "#searchButton", "#btnSearch", "button[type='submit']", ] clicked = False for sel in search_btns: try: if page.locator(sel).count() > 0: page.locator(sel).first.click() clicked = True break except Exception: pass if not clicked: # Fallback: Enter en el lastName input try: for sel in ln_selectors: if page.locator(sel).count() > 0: page.locator(sel).first.press("Enter") clicked = True break except Exception: pass if not clicked: errors.append("Official Records: no encontre boton Search ni pude enviar via Enter") browser.close() return matches, errors try: page.wait_for_load_state("networkidle", timeout=15_000) except PlaywrightTimeout: pass current_url = page.url # Step 4: Parsear resultados. La tabla del sistema Acclaim tiene columnas # tipicas: Doc Type, Recording Date, Party, Instrument #, Book/Page # Buscamos rows con doc type que contenga "LIS PENDENS" try: # Esperar a que cargue la grid de resultados page.wait_for_selector("table", timeout=8_000) except PlaywrightTimeout: errors.append(f"Official Records: tabla de resultados no apareció. URL: {current_url}") browser.close() return matches, errors # Buscar todas las filas de la tabla y filter por LIS PENDENS / LP all_rows = page.locator("table tr").all() lp_keywords = ["LIS PENDENS", "LISPENDENS", " LP ", "FORECLOSURE"] for row in all_rows[1:]: # skip header try: cells = [(c.text_content() or "").strip() for c in row.locator("td").all()] row_text = " ".join(cells).upper() if any(kw in row_text for kw in lp_keywords): # Parsear filas que matchean match = { "doc_type": next((c for c in cells if any(kw.strip() in c.upper() for kw in lp_keywords)), "Lis Pendens"), "all_columns_text": cells, "source_url": current_url, } # Intentar extraer fecha for c in cells: date_m = re.search(r"\d{1,2}/\d{1,2}/\d{4}", c) if date_m: match["filing_date"] = date_m.group(0) break # Intentar extraer instrument # for c in cells: inst_m = re.match(r"\d{4,}-\d{4,}", c) or re.match(r"\d{8,}", c) if inst_m: match["instrument_number"] = inst_m.group(0) break matches.append(match) except Exception: pass if not matches: # Sin matches NO es error — la propiedad puede estar limpia errors.append( f"Sin matches de Lis Pendens para owner '{owner_name}' en Duval Official Records. " f"Esto puede significar: (a) la propiedad NO esta en foreclosure, o " f"(b) el owner_name parseado no matchea el formato del clerk. URL final: {current_url}" ) browser.close() return matches, errors except Exception as e: errors.append(f"Official Records Duval scrape error: {e}") return matches, errors def _fetch_duval(address: str) -> dict: """Pipeline completo Duval: owner lookup + lis pendens search + liens inventory. v1: solo Property Appraiser funciona. Lis pendens scraper devuelve estructura vacia con warning. Liens inventory tambien vacia + warning. v1.1 (deferred a Phase 3.5): popular lis_pendens + all_liens reales. """ errors: list[str] = [] sources_used: list[str] = [] # Step 1: owner name from Property Appraiser owner_data, owner_errors = _fetch_property_owner_duval(address) errors.extend(owner_errors) if owner_data: sources_used.append(owner_data.get("source", "Duval Property Appraiser")) # Step 2: lis pendens lookup (solo si tenemos owner_name) lp_matches = [] if owner_data and owner_data.get("owner_name"): lp_matches, lp_errors = _fetch_lis_pendens_duval( owner_data["owner_name"], address ) errors.extend(lp_errors) if not lp_errors or "Sin matches" in (lp_errors[0] if lp_errors else ""): sources_used.append("Duval Official Records (or.duvalclerk.com)") # Step 3 (Wave 1.5A v1.2): Liens inventory — DEFERRED a v1.1, devolver placeholder # Cuando v1.1 funcione: _fetch_liens_duval(owner_data['owner_name'], owner_data['re_number']) # devolvera la lista completa de liens via doc_type filters en or.duvalclerk.com. liens_data = _empty_liens_inventory( reason="Acclaim Land Records scraper deferred to v1.1. Lookup manual disponible." ) # Step 4: Plaintiff classification (solo si hay lis pendens detectado) plaintiff_info = None if lp_matches: # En v1.1 cuando se parsee correctamente, el lis pendens row tendra columns # con plaintiff name. Por ahora, intentar extraer del primer match si esta. first_lp = lp_matches[0] if lp_matches else {} plaintiff_name_raw = ( first_lp.get("plaintiff") or (first_lp.get("all_columns_text") or [None])[0] ) plaintiff_info = classify_plaintiff(plaintiff_name_raw) # Status determination # v1: si el Lis Pendens lookup degrada gracefully, status='OWNER_VERIFIED' # (sabemos que la propiedad existe + owner, pero NO podemos confirmar foreclosure). # En v1.1 cuando Official Records funcione completamente, podra subir a 'CLEAN'. lis_pendens_degraded = any( "Official Records v1" in e for e in errors ) if lp_matches: status = "LIS_PENDENS_ACTIVE" most_recent = sorted( [m for m in lp_matches if m.get("filing_date")], key=lambda m: m["filing_date"], reverse=True ) most_recent_date = most_recent[0]["filing_date"] if most_recent else None elif owner_data: # Tenemos owner pero no pudimos verificar foreclosures if lis_pendens_degraded: status = "OWNER_VERIFIED" # PA OK, lis pendens manual else: status = "CLEAN" # Both PA + lis pendens lookups OK, no matches most_recent_date = None else: status = "UNKNOWN" most_recent_date = None # Pull case_number from first lis_pendens match if available case_number = None if lp_matches: first_lp = lp_matches[0] case_number = ( first_lp.get("case_number") or first_lp.get("instrument_number") ) return { "status": status, "county": "Duval", "address": address, # Property Appraiser data "owner_name": (owner_data or {}).get("owner_name"), "re_number": (owner_data or {}).get("re_number"), "tax_assessed_value": (owner_data or {}).get("tax_assessed_value"), "year_built_official": (owner_data or {}).get("year_built"), "last_sale_date": (owner_data or {}).get("last_sale_date"), # Lis pendens detail "lis_pendens": lp_matches, "lis_pendens_count": len(lp_matches), "most_recent_lis_pendens_date": most_recent_date, "case_number": case_number, # Wave 1.5A v1.2: Plaintiff + liens structured fields "plaintiff": plaintiff_info, "loan_origin": None, # v1.1 popula desde MTG records cuando funcione "all_liens": liens_data["all_liens"], "lien_count": liens_data["lien_count"], "total_surviving_debt": liens_data["total_surviving_debt"], "investor_warning": liens_data["investor_warning"], "liens_detail_status": liens_data["detail_status"], # Meta "sources_used": sources_used, "errors": errors, "fetched_at": datetime.now(timezone.utc).isoformat(), } # ═══════════════════════════════════════════════════════════════════════════ # Public API # ═══════════════════════════════════════════════════════════════════════════ def fetch_court_records( *, address: str, county_name: Optional[str] = None, ) -> dict: """Entry point. Dispatches per county. Soft-fail si condado no implementado. Returns dict con: status: CLEAN | LIS_PENDENS_ACTIVE | CODE_VIOLATIONS | TAX_DELINQUENT | UNKNOWN | NOT_IMPLEMENTED | DISABLED county owner_name, re_number, tax_assessed_value, year_built_official, last_sale_date lis_pendens: list of cases (si LIS_PENDENS_ACTIVE) sources_used, errors, fetched_at """ fetched_at = datetime.now(timezone.utc).isoformat() if not _enable_court_records(): return { "status": "DISABLED", "county": county_name, "address": address, "recommendation": ( "Court records scraping deshabilitado. Activar ENABLE_COURT_RECORDS=true " "en .env para deteccion deterministica de foreclosure / lis pendens." ), "sources_used": [], "errors": [], "fetched_at": fetched_at, } # Normalize county cn = (county_name or "").strip() cn_normalized = cn.replace(" County", "").replace(" county", "").strip() if cn_normalized.lower() == "duval": return _fetch_duval(address) # Soft-fail para condados no implementados clerk_url = COUNTY_CLERK_URLS.get(cn_normalized, "https://www.flclerks.com/") return { "status": "NOT_IMPLEMENTED", "county": cn_normalized, "address": address, "recommendation": ( f"Court records scraper no implementado para {cn_normalized} todavia. " f"Lookup manual en {clerk_url}. Wave 1.5A v1 cubre solo Duval; " "Miami-Dade / Broward / Palm Beach / Hillsborough en versiones posteriores." ), "clerk_url": clerk_url, "sources_used": [], "errors": [], "fetched_at": fetched_at, }