"""court_records.py — Deterministic foreclosure detection via county clerk records.
PROBLEMA QUE RESUELVE:
El sistema heuristico (price_validator.py + property_value.py) HIPOTETIZA que un
listing sospechosamente bajo es foreclosure. Para CONFIRMAR deterministicamente
necesitamos consultar los court records publicos del condado:
- Lis pendens (notice of foreclosure filing)
- Code enforcement violations + liens
- Tax delinquency
ALCANCE WAVE 1.5A:
- Solo DUVAL (Jacksonville) implementado en esta version
- Otros condados: soft-fail con URL del clerk para lookup manual
- Replicacion a Miami-Dade / Broward / Palm Beach / Hillsborough en versiones
posteriores SI Duval funciona end-to-end.
STACK:
- Playwright headless Chromium (local, $0 por consulta)
- Fallback a Firecrawl si Playwright falla (opcional, requiere ENABLE_FIRECRAWL=true)
- User-Agent identificable + rate-limit 1 req/2s por dominio
- Cache TTL 7 dias (los procesos judiciales se mueven lento)
OPT-IN:
ENABLE_COURT_RECORDS=true en .env
"""
from __future__ import annotations
import os
import re
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
# Rate limit por dominio (un request c/2s segun la regla)
_DOMAIN_LAST_REQUEST: dict[str, float] = {}
_RATE_LIMIT_SECONDS = 2.0
# User-Agent identificable (no spoof — somos un servicio legitimo)
USER_AGENT = "AR-House/1.0 (real estate investment analysis; +https://ar-house.example/contact)"
# Counties con scraper implementado en esta version
SUPPORTED_COUNTIES = {"Duval", "duval"}
# ═══════════════════════════════════════════════════════════════════════════
# Wave 1.5A v1.2: Plaintiff classification + Lien survival analysis
# ═══════════════════════════════════════════════════════════════════════════
# Categorias de plaintiff (quien demanda el foreclosure)
PLAINTIFF_TYPE_BANK_NATIONAL = "BANK_NATIONAL" # Wells Fargo, BofA, Chase, Citi, etc
PLAINTIFF_TYPE_BANK_REGIONAL = "BANK_REGIONAL" # Truist, Regions, BB&T, Fifth Third
PLAINTIFF_TYPE_CREDIT_UNION = "CREDIT_UNION" # Navy Federal, VyStar, etc
PLAINTIFF_TYPE_NONBANK_MORTGAGE = "NONBANK_MORTGAGE" # Quicken/Rocket, PHH, Mr. Cooper, Carrington
PLAINTIFF_TYPE_GSE = "GSE" # Fannie Mae, Freddie Mac, Ginnie Mae
PLAINTIFF_TYPE_TRUSTEE = "TRUSTEE_MBS" # Deutsche Bank AS Trustee, US Bank NA Trustee (MBS trusts)
PLAINTIFF_TYPE_IRS = "IRS_FEDERAL" # Internal Revenue Service (federal tax)
PLAINTIFF_TYPE_STATE_TAX = "STATE_TAX" # FL Dept of Revenue
PLAINTIFF_TYPE_HOA = "HOA_ASSOCIATION" # Homeowners / Condo association
PLAINTIFF_TYPE_MUNICIPAL = "MUNICIPAL" # City/County code enforcement, utility liens
PLAINTIFF_TYPE_HARD_MONEY = "HARD_MONEY_LENDER" # LLC nonbank, private high-rate lender
PLAINTIFF_TYPE_PRIVATE = "PRIVATE_INDIVIDUAL" # Private investor (named person)
PLAINTIFF_TYPE_OTHER = "OTHER"
PLAINTIFF_TYPE_UNKNOWN = "UNKNOWN"
# Mapeo de keywords del nombre del plaintiff a su categoria
_BANK_NATIONAL_KEYWORDS = (
"WELLS FARGO", "BANK OF AMERICA", "CHASE", "JPMORGAN", "JP MORGAN",
"CITIBANK", "CITI ", "CITI,", "U.S. BANK", "US BANK", "USBANK",
"PNC BANK", "TD BANK", "HSBC", "CAPITAL ONE",
)
_BANK_REGIONAL_KEYWORDS = (
"TRUIST", "REGIONS BANK", "BB&T", "BBT BANK", "FIFTH THIRD", "5/3 BANK",
"SUNTRUST", "M&T BANK", "KEYBANK", "HUNTINGTON",
)
_CREDIT_UNION_KEYWORDS = ("CREDIT UNION", "VYSTAR", "NAVY FEDERAL", "FCU", "C.U.")
_NONBANK_MORTGAGE_KEYWORDS = (
"QUICKEN", "ROCKET MORTGAGE", "PHH MORTGAGE", "MR. COOPER", "MR COOPER",
"NATIONSTAR", "CARRINGTON", "FREEDOM MORTGAGE", "LOANDEPOT",
"PENNYMAC", "NEW REZ", "NEWREZ", "SHELLPOINT", "OCWEN", "DITECH",
"BAYVIEW", "SPECIALIZED LOAN SERVICING",
)
_GSE_KEYWORDS = ("FANNIE MAE", "FEDERAL NATIONAL MORTGAGE", "FNMA",
"FREDDIE MAC", "FEDERAL HOME LOAN MORTGAGE", "FHLMC",
"GINNIE MAE", "GNMA")
_TRUSTEE_KEYWORDS = ("AS TRUSTEE", "AS INDENTURE TRUSTEE", "TRUSTEE FOR",
"DEUTSCHE BANK NATIONAL", "BANK OF NEW YORK MELLON", "BNY MELLON",
"WILMINGTON TRUST", "WILMINGTON SAVINGS")
_IRS_KEYWORDS = ("INTERNAL REVENUE SERVICE", "I.R.S.", "U.S. INTERNAL REVENUE",
"UNITED STATES OF AMERICA")
_STATE_TAX_KEYWORDS = ("FLORIDA DEPARTMENT OF REVENUE", "FL DEPT OF REVENUE",
"STATE OF FLORIDA")
_HOA_KEYWORDS = ("HOMEOWNERS", "ASSOCIATION INC", "ASSOCIATION, INC", "CONDOMINIUM",
"PROPERTY OWNERS ASSOCIATION", " POA ", "HOA")
_MUNICIPAL_KEYWORDS = ("CITY OF ", "COUNTY OF ", "MUNICIPALITY", "CODE ENFORCEMENT",
"TAX COLLECTOR")
def classify_plaintiff(name: Optional[str]) -> dict:
"""Clasifica el plaintiff de un foreclosure por su nombre.
Returns:
{
name: ,
type: PLAINTIFF_TYPE_*,
category: "primary_lender" | "junior_lienholder" | "tax_authority" |
"association" | "government" | "unknown",
is_original_loan_holder: bool | None (None si no se puede determinar)
}
"""
if not name:
return {
"name": None,
"type": PLAINTIFF_TYPE_UNKNOWN,
"category": "unknown",
"is_original_loan_holder": None,
}
upper = name.upper()
# Order matters: more specific patterns first (trustee MBS antes que bank national)
if any(kw in upper for kw in _TRUSTEE_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_TRUSTEE,
"category": "mbs_trustee",
"is_original_loan_holder": False,
"note": "MBS trustee: el loan fue securitizado. El servicer real puede ser otra entity."}
if any(kw in upper for kw in _IRS_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_IRS,
"category": "tax_authority",
"is_original_loan_holder": False,
"note": "IRS federal tax lien. SOBREVIVE el foreclosure con 120-day right of redemption."}
if any(kw in upper for kw in _STATE_TAX_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_STATE_TAX,
"category": "tax_authority",
"is_original_loan_holder": False,
"note": "FL state tax lien. Tipicamente extinguible pero verificar con title search."}
if any(kw in upper for kw in _GSE_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_GSE,
"category": "primary_lender",
"is_original_loan_holder": False,
"note": "GSE (Fannie/Freddie/Ginnie). Compraron el loan al originador. Comun en MLS post-foreclosure como REO."}
if any(kw in upper for kw in _BANK_NATIONAL_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_BANK_NATIONAL,
"category": "primary_lender",
"is_original_loan_holder": True,
"note": "Banco nacional grande. Probablemente originador del loan. Procesos estandarizados."}
if any(kw in upper for kw in _BANK_REGIONAL_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_BANK_REGIONAL,
"category": "primary_lender",
"is_original_loan_holder": True,
"note": "Banco regional. Mas flexible para negociar short sale o cash-for-keys."}
if any(kw in upper for kw in _CREDIT_UNION_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_CREDIT_UNION,
"category": "primary_lender",
"is_original_loan_holder": True,
"note": "Credit union. Members-only, foreclosure menos frecuente, mas dispuestos a workout."}
if any(kw in upper for kw in _NONBANK_MORTGAGE_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_NONBANK_MORTGAGE,
"category": "primary_lender",
"is_original_loan_holder": False,
"note": "Non-bank mortgage servicer. Suele ser servicer asignado, no el originador. Investor real es otro."}
if any(kw in upper for kw in _HOA_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_HOA,
"category": "association",
"is_original_loan_holder": False,
"note": "HOA/Condo association. FL Statute 720.3085(2)(b) limita lo que sobrevive a 12 meses dues o 1% del mortgage."}
if any(kw in upper for kw in _MUNICIPAL_KEYWORDS):
return {"name": name, "type": PLAINTIFF_TYPE_MUNICIPAL,
"category": "government",
"is_original_loan_holder": False,
"note": "Municipal lien (code enforcement / utility). SOBREVIVE el foreclosure — corre con la tierra."}
# LLC sin keyword conocido = probable hard money / private investor
if "LLC" in upper or "L.L.C." in upper:
return {"name": name, "type": PLAINTIFF_TYPE_HARD_MONEY,
"category": "primary_lender",
"is_original_loan_holder": True,
"note": "LLC sin patron de banco/servicer conocido. Probable hard money lender o private investor. Tasas 8-15%, terminos cortos."}
# Si el nombre tiene formato "APELLIDO, NOMBRE" o sin entity → individual
if "," in name or (not any(s in upper for s in ("INC", "CORP", "BANK", "LLC", "TRUST")) and len(name.split()) <= 4):
return {"name": name, "type": PLAINTIFF_TYPE_PRIVATE,
"category": "primary_lender",
"is_original_loan_holder": None,
"note": "Individuo (no entity). Posible seller financing, family loan, o private money."}
return {"name": name, "type": PLAINTIFF_TYPE_OTHER,
"category": "unknown",
"is_original_loan_holder": None,
"note": "Nombre no matchea patrones conocidos. Revisar manualmente."}
# ═══════════════════════════════════════════════════════════════════════════
# Lien types + survival analysis
# ═══════════════════════════════════════════════════════════════════════════
# Tipos de lien (sigue convencion estandar US real estate)
LIEN_TYPE_MORTGAGE_1ST = "MORTGAGE_1ST"
LIEN_TYPE_MORTGAGE_2ND = "MORTGAGE_2ND"
LIEN_TYPE_MORTGAGE_3RD = "MORTGAGE_3RD"
LIEN_TYPE_HELOC = "HELOC"
LIEN_TYPE_IRS_TAX = "IRS_TAX_LIEN"
LIEN_TYPE_STATE_TAX = "STATE_TAX_LIEN"
LIEN_TYPE_PROPERTY_TAX = "PROPERTY_TAX_LIEN" # County property tax delinquency
LIEN_TYPE_HOA = "HOA_LIEN"
LIEN_TYPE_MECHANICS = "MECHANICS_LIEN"
LIEN_TYPE_MUNICIPAL = "MUNICIPAL_LIEN" # Code enforcement, utility
LIEN_TYPE_JUDGMENT = "JUDGMENT_LIEN"
LIEN_TYPE_OTHER = "OTHER"
# Survival outcomes
SURVIVES = "SURVIVES"
EXTINGUISHED = "EXTINGUISHED"
EXTINGUISHED_BY_THIS_ACTION = "EXTINGUISHED_BY_THIS_ACTION" # plaintiff's own mortgage
UNCERTAIN = "UNCERTAIN"
def analyze_lien_survival(
*,
lien_type: str,
is_plaintiff_lien: bool = False,
is_senior_to_plaintiff: bool = False,
lien_filing_date: Optional[str] = None,
plaintiff_filing_date: Optional[str] = None,
) -> dict:
"""Determina si un lien sobrevive un foreclosure judicial en Florida.
Reglas FL standard:
- Plaintiff's own mortgage lien: EXTINGUISHED_BY_THIS_ACTION
- Liens senior (filed before) al plaintiff: SURVIVE (need to be paid off)
- Liens junior (filed after) al plaintiff: EXTINGUISHED
- IRS federal tax lien: SURVIVES with 120-day right of redemption (26 USC 7425(d))
- HOA dues: SURVIVE pero limitados a 12 mo o 1% del mortgage (FL 720.3085, 718.116)
- Municipal/Code Enforcement: SURVIVES — runs with the land
- Property tax: ALWAYS SURVIVES (super-priority)
Returns:
{
survives_foreclosure: SURVIVES | EXTINGUISHED | EXTINGUISHED_BY_THIS_ACTION | UNCERTAIN,
warning: str | None,
legal_basis: str,
}
"""
# Plaintiff's own mortgage gets wiped by the foreclosure that the plaintiff filed
if is_plaintiff_lien:
return {
"survives_foreclosure": EXTINGUISHED_BY_THIS_ACTION,
"warning": None,
"legal_basis": "Plaintiff's own mortgage is the subject of this foreclosure — extinguished by judicial sale.",
}
# Super-priority liens (always survive regardless of filing order)
if lien_type == LIEN_TYPE_PROPERTY_TAX:
return {
"survives_foreclosure": SURVIVES,
"warning": "Property tax delinquency tiene SUPER-PRIORIDAD. Sobrevive a TODO. Pagar inmediatamente post-cierre o el tax collector vende el property por tax deed.",
"legal_basis": "FL Statute 197.122 — ad valorem taxes constitute first lien superior to all other liens.",
}
if lien_type == LIEN_TYPE_IRS_TAX:
return {
"survives_foreclosure": SURVIVES,
"warning": "IRS federal tax lien SOBREVIVE el foreclosure. El IRS tiene 120-day right of redemption (puede recomprar el property pagando el bid + costos). Despues de 120 dias, el buyer queda dueno definitivamente. Sumar al MAB.",
"legal_basis": "26 USC 7425(d) — federal tax liens survive judicial sale with 120-day redemption period.",
}
if lien_type == LIEN_TYPE_MUNICIPAL:
return {
"survives_foreclosure": SURVIVES,
"warning": "Lien municipal (code enforcement / utility) SOBREVIVE — corre con la tierra. Sumar al MAB. Check code enforcement violations open antes de bidear.",
"legal_basis": "FL Statute 162.09 — code enforcement liens equivalent to civil judgment, runs with land.",
}
if lien_type == LIEN_TYPE_HOA:
return {
"survives_foreclosure": SURVIVES,
"warning": "HOA dues SOBREVIVE pero limitado por FL Statute 720.3085(2)(b): el nuevo owner debe 12 meses de dues o 1% del original mortgage, lo que sea menor. Si es condo: FL 718.116. Pedir HOA estoppel letter pre-bid.",
"legal_basis": "FL Statute 720.3085(2)(b) (HOA) o 718.116 (condo) — buyer hereda capped portion.",
}
if lien_type == LIEN_TYPE_STATE_TAX:
# State tax liens vary by chronology
if is_senior_to_plaintiff:
return {
"survives_foreclosure": SURVIVES,
"warning": "State tax lien filed BEFORE plaintiff's mortgage → sobrevive.",
"legal_basis": "FL Statute 197.0 — chronological priority among non-super-priority liens.",
}
return {
"survives_foreclosure": EXTINGUISHED,
"warning": "State tax lien filed AFTER plaintiff's mortgage típicamente se extingue. Validar con title search.",
"legal_basis": "Junior liens (post-mortgage) extinguished by foreclosure of senior lien.",
}
# Standard junior/senior analysis for mortgages, mechanics, judgment liens
if lien_type in (LIEN_TYPE_MORTGAGE_2ND, LIEN_TYPE_MORTGAGE_3RD, LIEN_TYPE_HELOC,
LIEN_TYPE_MECHANICS, LIEN_TYPE_JUDGMENT):
if is_senior_to_plaintiff:
return {
"survives_foreclosure": SURVIVES,
"warning": f"{lien_type} filed BEFORE plaintiff's mortgage → sobrevive. Sumar al MAB.",
"legal_basis": "Senior lien sobrevive foreclosure de lien junior.",
}
return {
"survives_foreclosure": EXTINGUISHED,
"warning": None,
"legal_basis": f"{lien_type} junior al plaintiff's mortgage — extinguido por foreclosure judicial.",
}
# 1st mortgage (no es del plaintiff) — significa que hay otra hipoteca senior
if lien_type == LIEN_TYPE_MORTGAGE_1ST:
if is_senior_to_plaintiff:
return {
"survives_foreclosure": SURVIVES,
"warning": "Existe un mortgage senior al plaintiff's lien. Buyer hereda ESTA hipoteca. Sumar saldo al MAB.",
"legal_basis": "Senior mortgage survives foreclosure of junior lien.",
}
return {
"survives_foreclosure": EXTINGUISHED,
"warning": None,
"legal_basis": "1st mortgage junior al plaintiff (raro pero posible si plaintiff es property tax/super-priority).",
}
# Fallback
return {
"survives_foreclosure": UNCERTAIN,
"warning": f"Tipo de lien '{lien_type}' no tiene regla automatica. Title search profesional ($300-500) requerido.",
"legal_basis": "Default safety: assume UNCERTAIN para tipos no clasificados.",
}
def _empty_liens_inventory(reason: str) -> dict:
"""Estructura vacía estandar para liens cuando v1.1 scraper no esta listo.
Mantiene el shape del output asi los downstream consumers no rompen.
"""
return {
"all_liens": [],
"lien_count": 0,
"total_surviving_debt": 0,
"investor_warning": (
"⚠️ Liens detail no disponible automaticamente (Wave 1.5A v1.1 deferred a Phase 3.5). "
"ANTES de cualquier oferta o bid: hacer **title search profesional** "
"($300-500 USD) en or.duvalclerk.com filtering por document types: "
"MTG (mortgages), NFTL (IRS federal tax liens), SATL (state tax), "
"NOC (mechanics), COD (code enforcement). Listing puede tener hasta "
"$30K-$100K en deudas heredables NO visibles aqui."
),
"detail_status": "PENDING_V1_1",
"detail_pending_reason": reason,
}
# URL del clerk por condado (para soft-fail mensajes en condados no implementados)
COUNTY_CLERK_URLS = {
"Duval": "https://www.duvalclerk.com/online-option/court-records",
"Miami-Dade": "https://www2.miami-dadeclerk.com/ocs",
"Broward": "https://officialrecords.broward.org",
"Palm Beach": "https://www.mypalmbeachclerk.com/departments/records-services-division",
"Hillsborough": "https://hillsclerk.com",
"Orange": "https://myorangeclerk.com",
}
def _enable_court_records() -> bool:
return os.getenv("ENABLE_COURT_RECORDS", "false").lower() == "true"
def _rate_limit(domain: str) -> None:
"""Bloquea si llamamos al mismo dominio < 2s atras."""
now = time.time()
last = _DOMAIN_LAST_REQUEST.get(domain, 0)
delta = now - last
if delta < _RATE_LIMIT_SECONDS:
time.sleep(_RATE_LIMIT_SECONDS - delta)
_DOMAIN_LAST_REQUEST[domain] = time.time()
# ═══════════════════════════════════════════════════════════════════════════
# Duval (Jacksonville) — implementacion completa
# ═══════════════════════════════════════════════════════════════════════════
# Diccionarios para parseo de address (Duval Property Appraiser tiene 5 campos)
_STREET_DIRECTIONS = {"N", "S", "E", "W", "NE", "NW", "SE", "SW",
"NORTH", "SOUTH", "EAST", "WEST"}
_STREET_SUFFIXES = {
"ST": "ST", "STREET": "ST",
"AVE": "AVE", "AVENUE": "AVE",
"RD": "RD", "ROAD": "RD",
"BLVD": "BLVD", "BOULEVARD": "BLVD",
"LN": "LN", "LANE": "LN",
"WAY": "WAY",
"DR": "DR", "DRIVE": "DR",
"CT": "CT", "COURT": "CT",
"PL": "PL", "PLACE": "PL",
"CIR": "CIR", "CIRCLE": "CIR",
"TER": "TER", "TERRACE": "TER",
"PKWY": "PKWY", "PARKWAY": "PKWY",
"HWY": "HWY", "HIGHWAY": "HWY",
"TRL": "TRL", "TRAIL": "TRL",
"XING": "XING", "CROSSING": "XING",
"ALY": "ALY", "ALLEY": "ALY",
"BND": "BND", "BEND": "BND",
}
def _parse_address_duval(address: str) -> Optional[dict]:
"""Parse "3245 N Pearl St, Jacksonville, FL 32206" into Duval form fields.
Returns:
{street_num: "3245", prefix: "N", name: "PEARL", suffix: "ST", zip: "32206"}
o None si no se pudo parsear.
"""
# Strip post-comma (city/state) y aislar street part
street_part = address.split(",")[0].strip()
# Capturar ZIP del original si lo hay
zip_match = re.search(r"\b(\d{5})(?:-\d{4})?\b", address)
zip_code = zip_match.group(1) if zip_match else ""
tokens = street_part.upper().split()
if len(tokens) < 2 or not tokens[0].isdigit():
return None
street_num = tokens[0]
rest = tokens[1:]
# Detectar prefix (direction) en el primer token despues del numero
prefix = ""
if rest and rest[0] in _STREET_DIRECTIONS:
# Normalize NORTH→N etc
prefix_raw = rest.pop(0)
prefix_map = {"NORTH": "N", "SOUTH": "S", "EAST": "E", "WEST": "W"}
prefix = prefix_map.get(prefix_raw, prefix_raw)
# Detectar suffix en el ultimo token
suffix = ""
if rest and rest[-1] in _STREET_SUFFIXES:
suffix = _STREET_SUFFIXES[rest.pop()]
name = " ".join(rest)
if not name:
return None
return {
"street_num": street_num,
"prefix": prefix,
"name": name,
"suffix": suffix,
"zip": zip_code,
}
def _fetch_property_owner_duval(address: str) -> tuple[Optional[dict], list[str]]:
"""Step 1 Duval: dado un address, buscar el owner name + RE# en el Property Appraiser.
Sitio: https://paopropertysearch.coj.net/Basic/Search.aspx
Form fields (ASP.NET, descubiertos via DOM inspection):
- ctl00$cphBody$tbStreetNumber → numero
- ctl00$cphBody$ddStreetPrefix → select (N/S/E/W)
- ctl00$cphBody$tbStreetName → nombre (sin prefix ni suffix)
- ctl00$cphBody$ddStreetSuffix → select (ST/AVE/RD/...)
- ctl00$cphBody$tbZipCode → ZIP opcional
- ctl00$cphBody$bSearch → submit
Resultado en Results.aspx (puede ser direct match o tabla).
Returns ({owner_name, re_number, year_built, tax_assessed, last_sale_date}, errors)
"""
errors: list[str] = []
try:
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
except ImportError as e:
errors.append(f"playwright no instalado: {e}")
return None, errors
parsed = _parse_address_duval(address)
if not parsed:
errors.append(f"No pude parsear el address (formato esperado: ' [prefix] [suffix]'): {address}")
return None, errors
_rate_limit("paopropertysearch.coj.net")
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(user_agent=USER_AGENT)
page = context.new_page()
page.set_default_timeout(15_000)
# IMPORTANTE: wait_until='networkidle' (no 'domcontentloaded') porque el
# sitio Duval usa ASP.NET WebForms con WebForm_DoPostBackWithOptions, una
# funcion JS que se carga DESPUES del DOMContentLoaded. Si clickeamos
# Search antes que esa JS exista, el click no submite nada (PAGEERROR).
page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx",
wait_until="networkidle", timeout=20_000)
# Nota: el sitio Duval ASP.NET no carga WebForm_DoPostBackWithOptions
# (probablemente por su compat IE8 mode). No esperamos a esa funcion —
# vamos directo al fallback de form.submit() que funciona consistentemente.
# Llenar form fields con los selectores REALES del sitio Duval ASP.NET
page.locator("#ctl00_cphBody_tbStreetNumber").fill(parsed["street_num"])
if parsed["prefix"]:
try:
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(
value=parsed["prefix"]
)
except Exception:
# Fallback: por label
try:
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(
label=parsed["prefix"]
)
except Exception:
pass # Si no matchea, dejar empty y confiar en street name match
page.locator("#ctl00_cphBody_tbStreetName").fill(parsed["name"])
if parsed["suffix"]:
try:
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(
value=parsed["suffix"]
)
except Exception:
try:
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(
label=parsed["suffix"]
)
except Exception:
pass
if parsed["zip"]:
try:
page.locator("#ctl00_cphBody_tbZipCode").fill(parsed["zip"])
except Exception:
pass
# Submit via JS form.submit() — bypasea WebForm_DoPostBackWithOptions
# que el sitio Duval no carga correctamente.
submitted = False
try:
page.evaluate("""() => {
const form = document.forms[0] || document.querySelector('form');
if (!form) throw new Error('no form found');
form.action = 'Results.aspx';
// ASP.NET espera el button name como input para detectar el click
let hidden = document.createElement('input');
hidden.type = 'hidden';
hidden.name = 'ctl00$cphBody$bSearch';
hidden.value = 'Search';
form.appendChild(hidden);
form.submit();
}""")
page.wait_for_url("**Results.aspx**", timeout=10_000)
submitted = True
except Exception as e:
errors.append(f"Property Appraiser: form submit fallo: {e}")
try:
page.wait_for_load_state("networkidle", timeout=10_000)
except PlaywrightTimeout:
pass
current_url = page.url
if not submitted:
errors.append(
f"Property Appraiser: submit no navego a Results.aspx (URL final: {current_url}). "
"ASP.NET WebForms postback fallo en las 3 estrategias (click/Enter/JS-eval)."
)
# Parse Results.aspx — la pagina tiene una tabla con columnas conocidas:
# RE #, Name (Last First), Street #, Street Name, Type, Direction, Unit, City, Zip
# Si "No Results Found" en body → property no existe en Duval PA
body_text = page.locator("body").inner_text() if submitted else ""
owner_name: Optional[str] = None
re_number: Optional[str] = None
year_built: Optional[int] = None
tax_assessed: Optional[int] = None
last_sale_date: Optional[str] = None
no_results = "No Results Found" in body_text or "No information available" in body_text
if no_results:
errors.append(
f"Property Appraiser: 'No Results Found' para "
f"{parsed['street_num']} {parsed['prefix']} {parsed['name']} {parsed['suffix']}. "
"Address probablemente no existe en Duval PA database o esta fuera del condado."
)
elif submitted:
# Parsear tabla de resultados via DOM
try:
results_table = page.locator("table").first
rows = results_table.locator("tr").all()
if len(rows) >= 2:
# Row 0 = headers, Row 1+ = data
# Buscar la fila que matchee el street # exacto si tenemos varios resultados
best_row = None
for r in rows[1:]:
cells = [(c.text_content() or "").strip() for c in r.locator("td").all()]
if len(cells) >= 9 and cells[2] == parsed["street_num"]:
# Match exacto del street #
best_row = cells
break
if not best_row:
# Sin match exacto, tomar el primero
cells_first = [(c.text_content() or "").strip() for c in rows[1].locator("td").all()]
if len(cells_first) >= 9:
best_row = cells_first
if best_row:
re_number = best_row[0] or None
owner_name = best_row[1] or None
# Otros campos en el detail page (TODO si necesario)
except Exception as e:
errors.append(f"Property Appraiser: error parseando tabla de resultados: {e}")
browser.close()
if not owner_name and not re_number:
return None, errors
return {
"owner_name": owner_name,
"re_number": re_number,
"year_built": year_built,
"tax_assessed_value": tax_assessed,
"last_sale_date": last_sale_date,
"source": "Duval Property Appraiser (paopropertysearch.coj.net)",
"result_url": current_url,
}, errors
except Exception as e:
errors.append(f"Property Appraiser Duval scrape error: {e}")
return None, errors
def _fetch_lis_pendens_duval(owner_name: str, address: str) -> tuple[list[dict], list[str]]:
"""Step 2 Duval: dado un owner name, buscar lis pendens recientes en Official Records.
Sitio: https://or.duvalclerk.com/
Flujo (descubierto via DOM inspection):
1. Landing tiene Disclaimer form. Click "I accept the conditions above" (id='btnButton').
2. Despues del POST, redirige a la home autenticada-as-guest.
3. Navegar a /search/SearchTypeName (link directo).
4. Llenar Last Name + First Name.
5. Submit. Resultados muestran columns: Doc Type, Recording Date, Grantor, Grantee.
6. Filter por Doc Type que contenga "LIS PENDENS" o codigo "LP".
Owner name del Property Appraiser viene como "JONES JOHN N" o
"CITY OF JACKSONVILLE" — formato "LASTNAME FIRSTNAME MIDDLE_INITIAL".
Returns (list of {case_number, filing_date, plaintiff, doc_type, instrument_num},
errors_list)
"""
errors: list[str] = []
matches: list[dict] = []
try:
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
except ImportError as e:
errors.append(f"playwright no instalado: {e}")
return matches, errors
_rate_limit("or.duvalclerk.com")
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(user_agent=USER_AGENT)
page = context.new_page()
page.set_default_timeout(15_000)
# Step 1: Aceptar disclaimer
page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000)
try:
# El boton tiene id='btnButton' (descubierto via DOM inspection)
page.locator("#btnButton").click()
page.wait_for_load_state("networkidle", timeout=10_000)
except Exception as e:
errors.append(f"Official Records: error aceptando disclaimer: {e}")
browser.close()
return matches, errors
# Step 2: Navegar al Name Search
try:
page.goto("https://or.duvalclerk.com/search/SearchTypeName",
wait_until="networkidle", timeout=15_000)
except Exception as e:
errors.append(f"Official Records: no pude navegar a SearchTypeName: {e}")
browser.close()
return matches, errors
# Step 3: Llenar form de busqueda por nombre
# Owner name del Duval PA viene como "LASTNAME FIRSTNAME M" o "LASTNAME LASTNAME2 FIRSTNAME"
# Algunos casos especiales: "CITY OF JACKSONVILLE" (no es persona)
# "JONES JOHN N" → last=JONES, first=JOHN, middle=N
parts = owner_name.strip().split()
last_name = parts[0] if parts else owner_name
first_name = parts[1] if len(parts) > 1 else ""
# Dump form structure if first time (errors=[] for debug)
# Selectores tipicos para nombre en este sistema (Acclaim Land Records)
ln_selectors = [
"input[name='lastName']", "input[id='lastName']",
"input[name='LastName']", "input[id='LastName']",
"input[name*='last']", "input[id*='last']",
]
fn_selectors = [
"input[name='firstName']", "input[id='firstName']",
"input[name='FirstName']", "input[id='FirstName']",
"input[name*='first']", "input[id*='first']",
]
ln_filled = False
for sel in ln_selectors:
if page.locator(sel).count() > 0:
page.locator(sel).first.fill(last_name)
ln_filled = True
break
if not ln_filled:
# v1: degradar gracefully — Acclaim Land Records system tiene form
# dinamico (JS-rendered) que requiere mas iteracion. v1.1 lo arregla.
errors.append(
"Official Records v1: Name Search form selectors no encontrados. "
"Lis pendens lookup automatico no disponible en este release. "
"Lookup manual: https://or.duvalclerk.com/search/SearchTypeName "
f"con last_name='{last_name}', first_name='{first_name}'"
)
browser.close()
return matches, errors
for sel in fn_selectors:
if page.locator(sel).count() > 0 and first_name:
page.locator(sel).first.fill(first_name)
break
# Submit
search_btns = [
"input[type='submit'][value*='Search']",
"button:has-text('Search')",
"input[type='button'][value*='Search']",
"#searchButton", "#btnSearch", "button[type='submit']",
]
clicked = False
for sel in search_btns:
try:
if page.locator(sel).count() > 0:
page.locator(sel).first.click()
clicked = True
break
except Exception:
pass
if not clicked:
# Fallback: Enter en el lastName input
try:
for sel in ln_selectors:
if page.locator(sel).count() > 0:
page.locator(sel).first.press("Enter")
clicked = True
break
except Exception:
pass
if not clicked:
errors.append("Official Records: no encontre boton Search ni pude enviar via Enter")
browser.close()
return matches, errors
try:
page.wait_for_load_state("networkidle", timeout=15_000)
except PlaywrightTimeout:
pass
current_url = page.url
# Step 4: Parsear resultados. La tabla del sistema Acclaim tiene columnas
# tipicas: Doc Type, Recording Date, Party, Instrument #, Book/Page
# Buscamos rows con doc type que contenga "LIS PENDENS"
try:
# Esperar a que cargue la grid de resultados
page.wait_for_selector("table", timeout=8_000)
except PlaywrightTimeout:
errors.append(f"Official Records: tabla de resultados no apareció. URL: {current_url}")
browser.close()
return matches, errors
# Buscar todas las filas de la tabla y filter por LIS PENDENS / LP
all_rows = page.locator("table tr").all()
lp_keywords = ["LIS PENDENS", "LISPENDENS", " LP ", "FORECLOSURE"]
for row in all_rows[1:]: # skip header
try:
cells = [(c.text_content() or "").strip() for c in row.locator("td").all()]
row_text = " ".join(cells).upper()
if any(kw in row_text for kw in lp_keywords):
# Parsear filas que matchean
match = {
"doc_type": next((c for c in cells if any(kw.strip() in c.upper() for kw in lp_keywords)), "Lis Pendens"),
"all_columns_text": cells,
"source_url": current_url,
}
# Intentar extraer fecha
for c in cells:
date_m = re.search(r"\d{1,2}/\d{1,2}/\d{4}", c)
if date_m:
match["filing_date"] = date_m.group(0)
break
# Intentar extraer instrument #
for c in cells:
inst_m = re.match(r"\d{4,}-\d{4,}", c) or re.match(r"\d{8,}", c)
if inst_m:
match["instrument_number"] = inst_m.group(0)
break
matches.append(match)
except Exception:
pass
if not matches:
# Sin matches NO es error — la propiedad puede estar limpia
errors.append(
f"Sin matches de Lis Pendens para owner '{owner_name}' en Duval Official Records. "
f"Esto puede significar: (a) la propiedad NO esta en foreclosure, o "
f"(b) el owner_name parseado no matchea el formato del clerk. URL final: {current_url}"
)
browser.close()
return matches, errors
except Exception as e:
errors.append(f"Official Records Duval scrape error: {e}")
return matches, errors
def _fetch_duval(address: str) -> dict:
"""Pipeline completo Duval: owner lookup + lis pendens search + liens inventory.
v1: solo Property Appraiser funciona. Lis pendens scraper devuelve estructura
vacia con warning. Liens inventory tambien vacia + warning.
v1.1 (deferred a Phase 3.5): popular lis_pendens + all_liens reales.
"""
errors: list[str] = []
sources_used: list[str] = []
# Step 1: owner name from Property Appraiser
owner_data, owner_errors = _fetch_property_owner_duval(address)
errors.extend(owner_errors)
if owner_data:
sources_used.append(owner_data.get("source", "Duval Property Appraiser"))
# Step 2: lis pendens lookup (solo si tenemos owner_name)
lp_matches = []
if owner_data and owner_data.get("owner_name"):
lp_matches, lp_errors = _fetch_lis_pendens_duval(
owner_data["owner_name"], address
)
errors.extend(lp_errors)
if not lp_errors or "Sin matches" in (lp_errors[0] if lp_errors else ""):
sources_used.append("Duval Official Records (or.duvalclerk.com)")
# Step 3 (Wave 1.5A v1.2): Liens inventory — DEFERRED a v1.1, devolver placeholder
# Cuando v1.1 funcione: _fetch_liens_duval(owner_data['owner_name'], owner_data['re_number'])
# devolvera la lista completa de liens via doc_type filters en or.duvalclerk.com.
liens_data = _empty_liens_inventory(
reason="Acclaim Land Records scraper deferred to v1.1. Lookup manual disponible."
)
# Step 4: Plaintiff classification (solo si hay lis pendens detectado)
plaintiff_info = None
if lp_matches:
# En v1.1 cuando se parsee correctamente, el lis pendens row tendra columns
# con plaintiff name. Por ahora, intentar extraer del primer match si esta.
first_lp = lp_matches[0] if lp_matches else {}
plaintiff_name_raw = (
first_lp.get("plaintiff")
or (first_lp.get("all_columns_text") or [None])[0]
)
plaintiff_info = classify_plaintiff(plaintiff_name_raw)
# Status determination
# v1: si el Lis Pendens lookup degrada gracefully, status='OWNER_VERIFIED'
# (sabemos que la propiedad existe + owner, pero NO podemos confirmar foreclosure).
# En v1.1 cuando Official Records funcione completamente, podra subir a 'CLEAN'.
lis_pendens_degraded = any(
"Official Records v1" in e for e in errors
)
if lp_matches:
status = "LIS_PENDENS_ACTIVE"
most_recent = sorted(
[m for m in lp_matches if m.get("filing_date")],
key=lambda m: m["filing_date"], reverse=True
)
most_recent_date = most_recent[0]["filing_date"] if most_recent else None
elif owner_data:
# Tenemos owner pero no pudimos verificar foreclosures
if lis_pendens_degraded:
status = "OWNER_VERIFIED" # PA OK, lis pendens manual
else:
status = "CLEAN" # Both PA + lis pendens lookups OK, no matches
most_recent_date = None
else:
status = "UNKNOWN"
most_recent_date = None
# Pull case_number from first lis_pendens match if available
case_number = None
if lp_matches:
first_lp = lp_matches[0]
case_number = (
first_lp.get("case_number")
or first_lp.get("instrument_number")
)
return {
"status": status,
"county": "Duval",
"address": address,
# Property Appraiser data
"owner_name": (owner_data or {}).get("owner_name"),
"re_number": (owner_data or {}).get("re_number"),
"tax_assessed_value": (owner_data or {}).get("tax_assessed_value"),
"year_built_official": (owner_data or {}).get("year_built"),
"last_sale_date": (owner_data or {}).get("last_sale_date"),
# Lis pendens detail
"lis_pendens": lp_matches,
"lis_pendens_count": len(lp_matches),
"most_recent_lis_pendens_date": most_recent_date,
"case_number": case_number,
# Wave 1.5A v1.2: Plaintiff + liens structured fields
"plaintiff": plaintiff_info,
"loan_origin": None, # v1.1 popula desde MTG records cuando funcione
"all_liens": liens_data["all_liens"],
"lien_count": liens_data["lien_count"],
"total_surviving_debt": liens_data["total_surviving_debt"],
"investor_warning": liens_data["investor_warning"],
"liens_detail_status": liens_data["detail_status"],
# Meta
"sources_used": sources_used,
"errors": errors,
"fetched_at": datetime.now(timezone.utc).isoformat(),
}
# ═══════════════════════════════════════════════════════════════════════════
# Public API
# ═══════════════════════════════════════════════════════════════════════════
def fetch_court_records(
*,
address: str,
county_name: Optional[str] = None,
) -> dict:
"""Entry point. Dispatches per county. Soft-fail si condado no implementado.
Returns dict con:
status: CLEAN | LIS_PENDENS_ACTIVE | CODE_VIOLATIONS | TAX_DELINQUENT
| UNKNOWN | NOT_IMPLEMENTED | DISABLED
county
owner_name, re_number, tax_assessed_value, year_built_official, last_sale_date
lis_pendens: list of cases (si LIS_PENDENS_ACTIVE)
sources_used, errors, fetched_at
"""
fetched_at = datetime.now(timezone.utc).isoformat()
if not _enable_court_records():
return {
"status": "DISABLED",
"county": county_name,
"address": address,
"recommendation": (
"Court records scraping deshabilitado. Activar ENABLE_COURT_RECORDS=true "
"en .env para deteccion deterministica de foreclosure / lis pendens."
),
"sources_used": [],
"errors": [],
"fetched_at": fetched_at,
}
# Normalize county
cn = (county_name or "").strip()
cn_normalized = cn.replace(" County", "").replace(" county", "").strip()
if cn_normalized.lower() == "duval":
return _fetch_duval(address)
# Soft-fail para condados no implementados
clerk_url = COUNTY_CLERK_URLS.get(cn_normalized, "https://www.flclerks.com/")
return {
"status": "NOT_IMPLEMENTED",
"county": cn_normalized,
"address": address,
"recommendation": (
f"Court records scraper no implementado para {cn_normalized} todavia. "
f"Lookup manual en {clerk_url}. Wave 1.5A v1 cubre solo Duval; "
"Miami-Dade / Broward / Palm Beach / Hillsborough en versiones posteriores."
),
"clerk_url": clerk_url,
"sources_used": [],
"errors": [],
"fetched_at": fetched_at,
}