1000 lines
45 KiB
Python
1000 lines
45 KiB
Python
"""court_records.py — Deterministic foreclosure detection via county clerk records.
|
|
|
|
PROBLEMA QUE RESUELVE:
|
|
El sistema heuristico (price_validator.py + property_value.py) HIPOTETIZA que un
|
|
listing sospechosamente bajo es foreclosure. Para CONFIRMAR deterministicamente
|
|
necesitamos consultar los court records publicos del condado:
|
|
- Lis pendens (notice of foreclosure filing)
|
|
- Code enforcement violations + liens
|
|
- Tax delinquency
|
|
|
|
ALCANCE WAVE 1.5A:
|
|
- Solo DUVAL (Jacksonville) implementado en esta version
|
|
- Otros condados: soft-fail con URL del clerk para lookup manual
|
|
- Replicacion a Miami-Dade / Broward / Palm Beach / Hillsborough en versiones
|
|
posteriores SI Duval funciona end-to-end.
|
|
|
|
STACK:
|
|
- Playwright headless Chromium (local, $0 por consulta)
|
|
- Fallback a Firecrawl si Playwright falla (opcional, requiere ENABLE_FIRECRAWL=true)
|
|
- User-Agent identificable + rate-limit 1 req/2s por dominio
|
|
- Cache TTL 7 dias (los procesos judiciales se mueven lento)
|
|
|
|
OPT-IN:
|
|
ENABLE_COURT_RECORDS=true en .env
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
# Rate limit por dominio (un request c/2s segun la regla)
|
|
_DOMAIN_LAST_REQUEST: dict[str, float] = {}
|
|
_RATE_LIMIT_SECONDS = 2.0
|
|
|
|
# User-Agent identificable (no spoof — somos un servicio legitimo)
|
|
USER_AGENT = "AR-House/1.0 (real estate investment analysis; +https://ar-house.example/contact)"
|
|
|
|
# Counties con scraper implementado en esta version
|
|
SUPPORTED_COUNTIES = {"Duval", "duval"}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Wave 1.5A v1.2: Plaintiff classification + Lien survival analysis
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
# Categorias de plaintiff (quien demanda el foreclosure)
|
|
PLAINTIFF_TYPE_BANK_NATIONAL = "BANK_NATIONAL" # Wells Fargo, BofA, Chase, Citi, etc
|
|
PLAINTIFF_TYPE_BANK_REGIONAL = "BANK_REGIONAL" # Truist, Regions, BB&T, Fifth Third
|
|
PLAINTIFF_TYPE_CREDIT_UNION = "CREDIT_UNION" # Navy Federal, VyStar, etc
|
|
PLAINTIFF_TYPE_NONBANK_MORTGAGE = "NONBANK_MORTGAGE" # Quicken/Rocket, PHH, Mr. Cooper, Carrington
|
|
PLAINTIFF_TYPE_GSE = "GSE" # Fannie Mae, Freddie Mac, Ginnie Mae
|
|
PLAINTIFF_TYPE_TRUSTEE = "TRUSTEE_MBS" # Deutsche Bank AS Trustee, US Bank NA Trustee (MBS trusts)
|
|
PLAINTIFF_TYPE_IRS = "IRS_FEDERAL" # Internal Revenue Service (federal tax)
|
|
PLAINTIFF_TYPE_STATE_TAX = "STATE_TAX" # FL Dept of Revenue
|
|
PLAINTIFF_TYPE_HOA = "HOA_ASSOCIATION" # Homeowners / Condo association
|
|
PLAINTIFF_TYPE_MUNICIPAL = "MUNICIPAL" # City/County code enforcement, utility liens
|
|
PLAINTIFF_TYPE_HARD_MONEY = "HARD_MONEY_LENDER" # LLC nonbank, private high-rate lender
|
|
PLAINTIFF_TYPE_PRIVATE = "PRIVATE_INDIVIDUAL" # Private investor (named person)
|
|
PLAINTIFF_TYPE_OTHER = "OTHER"
|
|
PLAINTIFF_TYPE_UNKNOWN = "UNKNOWN"
|
|
|
|
# Mapeo de keywords del nombre del plaintiff a su categoria
|
|
_BANK_NATIONAL_KEYWORDS = (
|
|
"WELLS FARGO", "BANK OF AMERICA", "CHASE", "JPMORGAN", "JP MORGAN",
|
|
"CITIBANK", "CITI ", "CITI,", "U.S. BANK", "US BANK", "USBANK",
|
|
"PNC BANK", "TD BANK", "HSBC", "CAPITAL ONE",
|
|
)
|
|
_BANK_REGIONAL_KEYWORDS = (
|
|
"TRUIST", "REGIONS BANK", "BB&T", "BBT BANK", "FIFTH THIRD", "5/3 BANK",
|
|
"SUNTRUST", "M&T BANK", "KEYBANK", "HUNTINGTON",
|
|
)
|
|
_CREDIT_UNION_KEYWORDS = ("CREDIT UNION", "VYSTAR", "NAVY FEDERAL", "FCU", "C.U.")
|
|
_NONBANK_MORTGAGE_KEYWORDS = (
|
|
"QUICKEN", "ROCKET MORTGAGE", "PHH MORTGAGE", "MR. COOPER", "MR COOPER",
|
|
"NATIONSTAR", "CARRINGTON", "FREEDOM MORTGAGE", "LOANDEPOT",
|
|
"PENNYMAC", "NEW REZ", "NEWREZ", "SHELLPOINT", "OCWEN", "DITECH",
|
|
"BAYVIEW", "SPECIALIZED LOAN SERVICING",
|
|
)
|
|
_GSE_KEYWORDS = ("FANNIE MAE", "FEDERAL NATIONAL MORTGAGE", "FNMA",
|
|
"FREDDIE MAC", "FEDERAL HOME LOAN MORTGAGE", "FHLMC",
|
|
"GINNIE MAE", "GNMA")
|
|
_TRUSTEE_KEYWORDS = ("AS TRUSTEE", "AS INDENTURE TRUSTEE", "TRUSTEE FOR",
|
|
"DEUTSCHE BANK NATIONAL", "BANK OF NEW YORK MELLON", "BNY MELLON",
|
|
"WILMINGTON TRUST", "WILMINGTON SAVINGS")
|
|
_IRS_KEYWORDS = ("INTERNAL REVENUE SERVICE", "I.R.S.", "U.S. INTERNAL REVENUE",
|
|
"UNITED STATES OF AMERICA")
|
|
_STATE_TAX_KEYWORDS = ("FLORIDA DEPARTMENT OF REVENUE", "FL DEPT OF REVENUE",
|
|
"STATE OF FLORIDA")
|
|
_HOA_KEYWORDS = ("HOMEOWNERS", "ASSOCIATION INC", "ASSOCIATION, INC", "CONDOMINIUM",
|
|
"PROPERTY OWNERS ASSOCIATION", " POA ", "HOA")
|
|
_MUNICIPAL_KEYWORDS = ("CITY OF ", "COUNTY OF ", "MUNICIPALITY", "CODE ENFORCEMENT",
|
|
"TAX COLLECTOR")
|
|
|
|
|
|
def classify_plaintiff(name: Optional[str]) -> dict:
|
|
"""Clasifica el plaintiff de un foreclosure por su nombre.
|
|
|
|
Returns:
|
|
{
|
|
name: <input>,
|
|
type: PLAINTIFF_TYPE_*,
|
|
category: "primary_lender" | "junior_lienholder" | "tax_authority" |
|
|
"association" | "government" | "unknown",
|
|
is_original_loan_holder: bool | None (None si no se puede determinar)
|
|
}
|
|
"""
|
|
if not name:
|
|
return {
|
|
"name": None,
|
|
"type": PLAINTIFF_TYPE_UNKNOWN,
|
|
"category": "unknown",
|
|
"is_original_loan_holder": None,
|
|
}
|
|
|
|
upper = name.upper()
|
|
|
|
# Order matters: more specific patterns first (trustee MBS antes que bank national)
|
|
if any(kw in upper for kw in _TRUSTEE_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_TRUSTEE,
|
|
"category": "mbs_trustee",
|
|
"is_original_loan_holder": False,
|
|
"note": "MBS trustee: el loan fue securitizado. El servicer real puede ser otra entity."}
|
|
if any(kw in upper for kw in _IRS_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_IRS,
|
|
"category": "tax_authority",
|
|
"is_original_loan_holder": False,
|
|
"note": "IRS federal tax lien. SOBREVIVE el foreclosure con 120-day right of redemption."}
|
|
if any(kw in upper for kw in _STATE_TAX_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_STATE_TAX,
|
|
"category": "tax_authority",
|
|
"is_original_loan_holder": False,
|
|
"note": "FL state tax lien. Tipicamente extinguible pero verificar con title search."}
|
|
if any(kw in upper for kw in _GSE_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_GSE,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": False,
|
|
"note": "GSE (Fannie/Freddie/Ginnie). Compraron el loan al originador. Comun en MLS post-foreclosure como REO."}
|
|
if any(kw in upper for kw in _BANK_NATIONAL_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_BANK_NATIONAL,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": True,
|
|
"note": "Banco nacional grande. Probablemente originador del loan. Procesos estandarizados."}
|
|
if any(kw in upper for kw in _BANK_REGIONAL_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_BANK_REGIONAL,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": True,
|
|
"note": "Banco regional. Mas flexible para negociar short sale o cash-for-keys."}
|
|
if any(kw in upper for kw in _CREDIT_UNION_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_CREDIT_UNION,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": True,
|
|
"note": "Credit union. Members-only, foreclosure menos frecuente, mas dispuestos a workout."}
|
|
if any(kw in upper for kw in _NONBANK_MORTGAGE_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_NONBANK_MORTGAGE,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": False,
|
|
"note": "Non-bank mortgage servicer. Suele ser servicer asignado, no el originador. Investor real es otro."}
|
|
if any(kw in upper for kw in _HOA_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_HOA,
|
|
"category": "association",
|
|
"is_original_loan_holder": False,
|
|
"note": "HOA/Condo association. FL Statute 720.3085(2)(b) limita lo que sobrevive a 12 meses dues o 1% del mortgage."}
|
|
if any(kw in upper for kw in _MUNICIPAL_KEYWORDS):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_MUNICIPAL,
|
|
"category": "government",
|
|
"is_original_loan_holder": False,
|
|
"note": "Municipal lien (code enforcement / utility). SOBREVIVE el foreclosure — corre con la tierra."}
|
|
|
|
# LLC sin keyword conocido = probable hard money / private investor
|
|
if "LLC" in upper or "L.L.C." in upper:
|
|
return {"name": name, "type": PLAINTIFF_TYPE_HARD_MONEY,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": True,
|
|
"note": "LLC sin patron de banco/servicer conocido. Probable hard money lender o private investor. Tasas 8-15%, terminos cortos."}
|
|
|
|
# Si el nombre tiene formato "APELLIDO, NOMBRE" o sin entity → individual
|
|
if "," in name or (not any(s in upper for s in ("INC", "CORP", "BANK", "LLC", "TRUST")) and len(name.split()) <= 4):
|
|
return {"name": name, "type": PLAINTIFF_TYPE_PRIVATE,
|
|
"category": "primary_lender",
|
|
"is_original_loan_holder": None,
|
|
"note": "Individuo (no entity). Posible seller financing, family loan, o private money."}
|
|
|
|
return {"name": name, "type": PLAINTIFF_TYPE_OTHER,
|
|
"category": "unknown",
|
|
"is_original_loan_holder": None,
|
|
"note": "Nombre no matchea patrones conocidos. Revisar manualmente."}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Lien types + survival analysis
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
# Tipos de lien (sigue convencion estandar US real estate)
|
|
LIEN_TYPE_MORTGAGE_1ST = "MORTGAGE_1ST"
|
|
LIEN_TYPE_MORTGAGE_2ND = "MORTGAGE_2ND"
|
|
LIEN_TYPE_MORTGAGE_3RD = "MORTGAGE_3RD"
|
|
LIEN_TYPE_HELOC = "HELOC"
|
|
LIEN_TYPE_IRS_TAX = "IRS_TAX_LIEN"
|
|
LIEN_TYPE_STATE_TAX = "STATE_TAX_LIEN"
|
|
LIEN_TYPE_PROPERTY_TAX = "PROPERTY_TAX_LIEN" # County property tax delinquency
|
|
LIEN_TYPE_HOA = "HOA_LIEN"
|
|
LIEN_TYPE_MECHANICS = "MECHANICS_LIEN"
|
|
LIEN_TYPE_MUNICIPAL = "MUNICIPAL_LIEN" # Code enforcement, utility
|
|
LIEN_TYPE_JUDGMENT = "JUDGMENT_LIEN"
|
|
LIEN_TYPE_OTHER = "OTHER"
|
|
|
|
# Survival outcomes
|
|
SURVIVES = "SURVIVES"
|
|
EXTINGUISHED = "EXTINGUISHED"
|
|
EXTINGUISHED_BY_THIS_ACTION = "EXTINGUISHED_BY_THIS_ACTION" # plaintiff's own mortgage
|
|
UNCERTAIN = "UNCERTAIN"
|
|
|
|
|
|
def analyze_lien_survival(
|
|
*,
|
|
lien_type: str,
|
|
is_plaintiff_lien: bool = False,
|
|
is_senior_to_plaintiff: bool = False,
|
|
lien_filing_date: Optional[str] = None,
|
|
plaintiff_filing_date: Optional[str] = None,
|
|
) -> dict:
|
|
"""Determina si un lien sobrevive un foreclosure judicial en Florida.
|
|
|
|
Reglas FL standard:
|
|
- Plaintiff's own mortgage lien: EXTINGUISHED_BY_THIS_ACTION
|
|
- Liens senior (filed before) al plaintiff: SURVIVE (need to be paid off)
|
|
- Liens junior (filed after) al plaintiff: EXTINGUISHED
|
|
- IRS federal tax lien: SURVIVES with 120-day right of redemption (26 USC 7425(d))
|
|
- HOA dues: SURVIVE pero limitados a 12 mo o 1% del mortgage (FL 720.3085, 718.116)
|
|
- Municipal/Code Enforcement: SURVIVES — runs with the land
|
|
- Property tax: ALWAYS SURVIVES (super-priority)
|
|
|
|
Returns:
|
|
{
|
|
survives_foreclosure: SURVIVES | EXTINGUISHED | EXTINGUISHED_BY_THIS_ACTION | UNCERTAIN,
|
|
warning: str | None,
|
|
legal_basis: str,
|
|
}
|
|
"""
|
|
# Plaintiff's own mortgage gets wiped by the foreclosure that the plaintiff filed
|
|
if is_plaintiff_lien:
|
|
return {
|
|
"survives_foreclosure": EXTINGUISHED_BY_THIS_ACTION,
|
|
"warning": None,
|
|
"legal_basis": "Plaintiff's own mortgage is the subject of this foreclosure — extinguished by judicial sale.",
|
|
}
|
|
|
|
# Super-priority liens (always survive regardless of filing order)
|
|
if lien_type == LIEN_TYPE_PROPERTY_TAX:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "Property tax delinquency tiene SUPER-PRIORIDAD. Sobrevive a TODO. Pagar inmediatamente post-cierre o el tax collector vende el property por tax deed.",
|
|
"legal_basis": "FL Statute 197.122 — ad valorem taxes constitute first lien superior to all other liens.",
|
|
}
|
|
if lien_type == LIEN_TYPE_IRS_TAX:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "IRS federal tax lien SOBREVIVE el foreclosure. El IRS tiene 120-day right of redemption (puede recomprar el property pagando el bid + costos). Despues de 120 dias, el buyer queda dueno definitivamente. Sumar al MAB.",
|
|
"legal_basis": "26 USC 7425(d) — federal tax liens survive judicial sale with 120-day redemption period.",
|
|
}
|
|
if lien_type == LIEN_TYPE_MUNICIPAL:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "Lien municipal (code enforcement / utility) SOBREVIVE — corre con la tierra. Sumar al MAB. Check code enforcement violations open antes de bidear.",
|
|
"legal_basis": "FL Statute 162.09 — code enforcement liens equivalent to civil judgment, runs with land.",
|
|
}
|
|
if lien_type == LIEN_TYPE_HOA:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "HOA dues SOBREVIVE pero limitado por FL Statute 720.3085(2)(b): el nuevo owner debe 12 meses de dues o 1% del original mortgage, lo que sea menor. Si es condo: FL 718.116. Pedir HOA estoppel letter pre-bid.",
|
|
"legal_basis": "FL Statute 720.3085(2)(b) (HOA) o 718.116 (condo) — buyer hereda capped portion.",
|
|
}
|
|
if lien_type == LIEN_TYPE_STATE_TAX:
|
|
# State tax liens vary by chronology
|
|
if is_senior_to_plaintiff:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "State tax lien filed BEFORE plaintiff's mortgage → sobrevive.",
|
|
"legal_basis": "FL Statute 197.0 — chronological priority among non-super-priority liens.",
|
|
}
|
|
return {
|
|
"survives_foreclosure": EXTINGUISHED,
|
|
"warning": "State tax lien filed AFTER plaintiff's mortgage típicamente se extingue. Validar con title search.",
|
|
"legal_basis": "Junior liens (post-mortgage) extinguished by foreclosure of senior lien.",
|
|
}
|
|
|
|
# Standard junior/senior analysis for mortgages, mechanics, judgment liens
|
|
if lien_type in (LIEN_TYPE_MORTGAGE_2ND, LIEN_TYPE_MORTGAGE_3RD, LIEN_TYPE_HELOC,
|
|
LIEN_TYPE_MECHANICS, LIEN_TYPE_JUDGMENT):
|
|
if is_senior_to_plaintiff:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": f"{lien_type} filed BEFORE plaintiff's mortgage → sobrevive. Sumar al MAB.",
|
|
"legal_basis": "Senior lien sobrevive foreclosure de lien junior.",
|
|
}
|
|
return {
|
|
"survives_foreclosure": EXTINGUISHED,
|
|
"warning": None,
|
|
"legal_basis": f"{lien_type} junior al plaintiff's mortgage — extinguido por foreclosure judicial.",
|
|
}
|
|
|
|
# 1st mortgage (no es del plaintiff) — significa que hay otra hipoteca senior
|
|
if lien_type == LIEN_TYPE_MORTGAGE_1ST:
|
|
if is_senior_to_plaintiff:
|
|
return {
|
|
"survives_foreclosure": SURVIVES,
|
|
"warning": "Existe un mortgage senior al plaintiff's lien. Buyer hereda ESTA hipoteca. Sumar saldo al MAB.",
|
|
"legal_basis": "Senior mortgage survives foreclosure of junior lien.",
|
|
}
|
|
return {
|
|
"survives_foreclosure": EXTINGUISHED,
|
|
"warning": None,
|
|
"legal_basis": "1st mortgage junior al plaintiff (raro pero posible si plaintiff es property tax/super-priority).",
|
|
}
|
|
|
|
# Fallback
|
|
return {
|
|
"survives_foreclosure": UNCERTAIN,
|
|
"warning": f"Tipo de lien '{lien_type}' no tiene regla automatica. Title search profesional ($300-500) requerido.",
|
|
"legal_basis": "Default safety: assume UNCERTAIN para tipos no clasificados.",
|
|
}
|
|
|
|
|
|
def _empty_liens_inventory(reason: str) -> dict:
|
|
"""Estructura vacía estandar para liens cuando v1.1 scraper no esta listo.
|
|
|
|
Mantiene el shape del output asi los downstream consumers no rompen.
|
|
"""
|
|
return {
|
|
"all_liens": [],
|
|
"lien_count": 0,
|
|
"total_surviving_debt": 0,
|
|
"investor_warning": (
|
|
"⚠️ Liens detail no disponible automaticamente (Wave 1.5A v1.1 deferred a Phase 3.5). "
|
|
"ANTES de cualquier oferta o bid: hacer **title search profesional** "
|
|
"($300-500 USD) en or.duvalclerk.com filtering por document types: "
|
|
"MTG (mortgages), NFTL (IRS federal tax liens), SATL (state tax), "
|
|
"NOC (mechanics), COD (code enforcement). Listing puede tener hasta "
|
|
"$30K-$100K en deudas heredables NO visibles aqui."
|
|
),
|
|
"detail_status": "PENDING_V1_1",
|
|
"detail_pending_reason": reason,
|
|
}
|
|
|
|
# URL del clerk por condado (para soft-fail mensajes en condados no implementados)
|
|
COUNTY_CLERK_URLS = {
|
|
"Duval": "https://www.duvalclerk.com/online-option/court-records",
|
|
"Miami-Dade": "https://www2.miami-dadeclerk.com/ocs",
|
|
"Broward": "https://officialrecords.broward.org",
|
|
"Palm Beach": "https://www.mypalmbeachclerk.com/departments/records-services-division",
|
|
"Hillsborough": "https://hillsclerk.com",
|
|
"Orange": "https://myorangeclerk.com",
|
|
}
|
|
|
|
|
|
def _enable_court_records() -> bool:
|
|
return os.getenv("ENABLE_COURT_RECORDS", "false").lower() == "true"
|
|
|
|
|
|
def _rate_limit(domain: str) -> None:
|
|
"""Bloquea si llamamos al mismo dominio < 2s atras."""
|
|
now = time.time()
|
|
last = _DOMAIN_LAST_REQUEST.get(domain, 0)
|
|
delta = now - last
|
|
if delta < _RATE_LIMIT_SECONDS:
|
|
time.sleep(_RATE_LIMIT_SECONDS - delta)
|
|
_DOMAIN_LAST_REQUEST[domain] = time.time()
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Duval (Jacksonville) — implementacion completa
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
# Diccionarios para parseo de address (Duval Property Appraiser tiene 5 campos)
|
|
_STREET_DIRECTIONS = {"N", "S", "E", "W", "NE", "NW", "SE", "SW",
|
|
"NORTH", "SOUTH", "EAST", "WEST"}
|
|
_STREET_SUFFIXES = {
|
|
"ST": "ST", "STREET": "ST",
|
|
"AVE": "AVE", "AVENUE": "AVE",
|
|
"RD": "RD", "ROAD": "RD",
|
|
"BLVD": "BLVD", "BOULEVARD": "BLVD",
|
|
"LN": "LN", "LANE": "LN",
|
|
"WAY": "WAY",
|
|
"DR": "DR", "DRIVE": "DR",
|
|
"CT": "CT", "COURT": "CT",
|
|
"PL": "PL", "PLACE": "PL",
|
|
"CIR": "CIR", "CIRCLE": "CIR",
|
|
"TER": "TER", "TERRACE": "TER",
|
|
"PKWY": "PKWY", "PARKWAY": "PKWY",
|
|
"HWY": "HWY", "HIGHWAY": "HWY",
|
|
"TRL": "TRL", "TRAIL": "TRL",
|
|
"XING": "XING", "CROSSING": "XING",
|
|
"ALY": "ALY", "ALLEY": "ALY",
|
|
"BND": "BND", "BEND": "BND",
|
|
}
|
|
|
|
|
|
def _parse_address_duval(address: str) -> Optional[dict]:
|
|
"""Parse "3245 N Pearl St, Jacksonville, FL 32206" into Duval form fields.
|
|
|
|
Returns:
|
|
{street_num: "3245", prefix: "N", name: "PEARL", suffix: "ST", zip: "32206"}
|
|
o None si no se pudo parsear.
|
|
"""
|
|
# Strip post-comma (city/state) y aislar street part
|
|
street_part = address.split(",")[0].strip()
|
|
# Capturar ZIP del original si lo hay
|
|
zip_match = re.search(r"\b(\d{5})(?:-\d{4})?\b", address)
|
|
zip_code = zip_match.group(1) if zip_match else ""
|
|
|
|
tokens = street_part.upper().split()
|
|
if len(tokens) < 2 or not tokens[0].isdigit():
|
|
return None
|
|
|
|
street_num = tokens[0]
|
|
rest = tokens[1:]
|
|
|
|
# Detectar prefix (direction) en el primer token despues del numero
|
|
prefix = ""
|
|
if rest and rest[0] in _STREET_DIRECTIONS:
|
|
# Normalize NORTH→N etc
|
|
prefix_raw = rest.pop(0)
|
|
prefix_map = {"NORTH": "N", "SOUTH": "S", "EAST": "E", "WEST": "W"}
|
|
prefix = prefix_map.get(prefix_raw, prefix_raw)
|
|
|
|
# Detectar suffix en el ultimo token
|
|
suffix = ""
|
|
if rest and rest[-1] in _STREET_SUFFIXES:
|
|
suffix = _STREET_SUFFIXES[rest.pop()]
|
|
|
|
name = " ".join(rest)
|
|
if not name:
|
|
return None
|
|
|
|
return {
|
|
"street_num": street_num,
|
|
"prefix": prefix,
|
|
"name": name,
|
|
"suffix": suffix,
|
|
"zip": zip_code,
|
|
}
|
|
|
|
|
|
def _fetch_property_owner_duval(address: str) -> tuple[Optional[dict], list[str]]:
|
|
"""Step 1 Duval: dado un address, buscar el owner name + RE# en el Property Appraiser.
|
|
|
|
Sitio: https://paopropertysearch.coj.net/Basic/Search.aspx
|
|
Form fields (ASP.NET, descubiertos via DOM inspection):
|
|
- ctl00$cphBody$tbStreetNumber → numero
|
|
- ctl00$cphBody$ddStreetPrefix → select (N/S/E/W)
|
|
- ctl00$cphBody$tbStreetName → nombre (sin prefix ni suffix)
|
|
- ctl00$cphBody$ddStreetSuffix → select (ST/AVE/RD/...)
|
|
- ctl00$cphBody$tbZipCode → ZIP opcional
|
|
- ctl00$cphBody$bSearch → submit
|
|
Resultado en Results.aspx (puede ser direct match o tabla).
|
|
|
|
Returns ({owner_name, re_number, year_built, tax_assessed, last_sale_date}, errors)
|
|
"""
|
|
errors: list[str] = []
|
|
try:
|
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
|
|
except ImportError as e:
|
|
errors.append(f"playwright no instalado: {e}")
|
|
return None, errors
|
|
|
|
parsed = _parse_address_duval(address)
|
|
if not parsed:
|
|
errors.append(f"No pude parsear el address (formato esperado: '<num> [prefix] <name> [suffix]'): {address}")
|
|
return None, errors
|
|
|
|
_rate_limit("paopropertysearch.coj.net")
|
|
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(user_agent=USER_AGENT)
|
|
page = context.new_page()
|
|
page.set_default_timeout(15_000)
|
|
|
|
# IMPORTANTE: wait_until='networkidle' (no 'domcontentloaded') porque el
|
|
# sitio Duval usa ASP.NET WebForms con WebForm_DoPostBackWithOptions, una
|
|
# funcion JS que se carga DESPUES del DOMContentLoaded. Si clickeamos
|
|
# Search antes que esa JS exista, el click no submite nada (PAGEERROR).
|
|
page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx",
|
|
wait_until="networkidle", timeout=20_000)
|
|
|
|
# Nota: el sitio Duval ASP.NET no carga WebForm_DoPostBackWithOptions
|
|
# (probablemente por su compat IE8 mode). No esperamos a esa funcion —
|
|
# vamos directo al fallback de form.submit() que funciona consistentemente.
|
|
|
|
# Llenar form fields con los selectores REALES del sitio Duval ASP.NET
|
|
page.locator("#ctl00_cphBody_tbStreetNumber").fill(parsed["street_num"])
|
|
|
|
if parsed["prefix"]:
|
|
try:
|
|
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(
|
|
value=parsed["prefix"]
|
|
)
|
|
except Exception:
|
|
# Fallback: por label
|
|
try:
|
|
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(
|
|
label=parsed["prefix"]
|
|
)
|
|
except Exception:
|
|
pass # Si no matchea, dejar empty y confiar en street name match
|
|
|
|
page.locator("#ctl00_cphBody_tbStreetName").fill(parsed["name"])
|
|
|
|
if parsed["suffix"]:
|
|
try:
|
|
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(
|
|
value=parsed["suffix"]
|
|
)
|
|
except Exception:
|
|
try:
|
|
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(
|
|
label=parsed["suffix"]
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
if parsed["zip"]:
|
|
try:
|
|
page.locator("#ctl00_cphBody_tbZipCode").fill(parsed["zip"])
|
|
except Exception:
|
|
pass
|
|
|
|
# Submit via JS form.submit() — bypasea WebForm_DoPostBackWithOptions
|
|
# que el sitio Duval no carga correctamente.
|
|
submitted = False
|
|
try:
|
|
page.evaluate("""() => {
|
|
const form = document.forms[0] || document.querySelector('form');
|
|
if (!form) throw new Error('no form found');
|
|
form.action = 'Results.aspx';
|
|
// ASP.NET espera el button name como input para detectar el click
|
|
let hidden = document.createElement('input');
|
|
hidden.type = 'hidden';
|
|
hidden.name = 'ctl00$cphBody$bSearch';
|
|
hidden.value = 'Search';
|
|
form.appendChild(hidden);
|
|
form.submit();
|
|
}""")
|
|
page.wait_for_url("**Results.aspx**", timeout=10_000)
|
|
submitted = True
|
|
except Exception as e:
|
|
errors.append(f"Property Appraiser: form submit fallo: {e}")
|
|
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
except PlaywrightTimeout:
|
|
pass
|
|
|
|
current_url = page.url
|
|
|
|
if not submitted:
|
|
errors.append(
|
|
f"Property Appraiser: submit no navego a Results.aspx (URL final: {current_url}). "
|
|
"ASP.NET WebForms postback fallo en las 3 estrategias (click/Enter/JS-eval)."
|
|
)
|
|
|
|
# Parse Results.aspx — la pagina tiene una tabla con columnas conocidas:
|
|
# RE #, Name (Last First), Street #, Street Name, Type, Direction, Unit, City, Zip
|
|
# Si "No Results Found" en body → property no existe en Duval PA
|
|
body_text = page.locator("body").inner_text() if submitted else ""
|
|
|
|
owner_name: Optional[str] = None
|
|
re_number: Optional[str] = None
|
|
year_built: Optional[int] = None
|
|
tax_assessed: Optional[int] = None
|
|
last_sale_date: Optional[str] = None
|
|
|
|
no_results = "No Results Found" in body_text or "No information available" in body_text
|
|
|
|
if no_results:
|
|
errors.append(
|
|
f"Property Appraiser: 'No Results Found' para "
|
|
f"{parsed['street_num']} {parsed['prefix']} {parsed['name']} {parsed['suffix']}. "
|
|
"Address probablemente no existe en Duval PA database o esta fuera del condado."
|
|
)
|
|
elif submitted:
|
|
# Parsear tabla de resultados via DOM
|
|
try:
|
|
results_table = page.locator("table").first
|
|
rows = results_table.locator("tr").all()
|
|
if len(rows) >= 2:
|
|
# Row 0 = headers, Row 1+ = data
|
|
# Buscar la fila que matchee el street # exacto si tenemos varios resultados
|
|
best_row = None
|
|
for r in rows[1:]:
|
|
cells = [(c.text_content() or "").strip() for c in r.locator("td").all()]
|
|
if len(cells) >= 9 and cells[2] == parsed["street_num"]:
|
|
# Match exacto del street #
|
|
best_row = cells
|
|
break
|
|
if not best_row:
|
|
# Sin match exacto, tomar el primero
|
|
cells_first = [(c.text_content() or "").strip() for c in rows[1].locator("td").all()]
|
|
if len(cells_first) >= 9:
|
|
best_row = cells_first
|
|
|
|
if best_row:
|
|
re_number = best_row[0] or None
|
|
owner_name = best_row[1] or None
|
|
# Otros campos en el detail page (TODO si necesario)
|
|
except Exception as e:
|
|
errors.append(f"Property Appraiser: error parseando tabla de resultados: {e}")
|
|
|
|
browser.close()
|
|
|
|
if not owner_name and not re_number:
|
|
return None, errors
|
|
|
|
return {
|
|
"owner_name": owner_name,
|
|
"re_number": re_number,
|
|
"year_built": year_built,
|
|
"tax_assessed_value": tax_assessed,
|
|
"last_sale_date": last_sale_date,
|
|
"source": "Duval Property Appraiser (paopropertysearch.coj.net)",
|
|
"result_url": current_url,
|
|
}, errors
|
|
except Exception as e:
|
|
errors.append(f"Property Appraiser Duval scrape error: {e}")
|
|
return None, errors
|
|
|
|
|
|
def _fetch_lis_pendens_duval(owner_name: str, address: str) -> tuple[list[dict], list[str]]:
|
|
"""Step 2 Duval: dado un owner name, buscar lis pendens recientes en Official Records.
|
|
|
|
Sitio: https://or.duvalclerk.com/
|
|
Flujo (descubierto via DOM inspection):
|
|
1. Landing tiene Disclaimer form. Click "I accept the conditions above" (id='btnButton').
|
|
2. Despues del POST, redirige a la home autenticada-as-guest.
|
|
3. Navegar a /search/SearchTypeName (link directo).
|
|
4. Llenar Last Name + First Name.
|
|
5. Submit. Resultados muestran columns: Doc Type, Recording Date, Grantor, Grantee.
|
|
6. Filter por Doc Type que contenga "LIS PENDENS" o codigo "LP".
|
|
|
|
Owner name del Property Appraiser viene como "JONES JOHN N" o
|
|
"CITY OF JACKSONVILLE" — formato "LASTNAME FIRSTNAME MIDDLE_INITIAL".
|
|
|
|
Returns (list of {case_number, filing_date, plaintiff, doc_type, instrument_num},
|
|
errors_list)
|
|
"""
|
|
errors: list[str] = []
|
|
matches: list[dict] = []
|
|
|
|
try:
|
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
|
|
except ImportError as e:
|
|
errors.append(f"playwright no instalado: {e}")
|
|
return matches, errors
|
|
|
|
_rate_limit("or.duvalclerk.com")
|
|
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(user_agent=USER_AGENT)
|
|
page = context.new_page()
|
|
page.set_default_timeout(15_000)
|
|
|
|
# Step 1: Aceptar disclaimer
|
|
page.goto("https://or.duvalclerk.com/", wait_until="networkidle", timeout=20_000)
|
|
try:
|
|
# El boton tiene id='btnButton' (descubierto via DOM inspection)
|
|
page.locator("#btnButton").click()
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
except Exception as e:
|
|
errors.append(f"Official Records: error aceptando disclaimer: {e}")
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
# Step 2: Navegar al Name Search
|
|
try:
|
|
page.goto("https://or.duvalclerk.com/search/SearchTypeName",
|
|
wait_until="networkidle", timeout=15_000)
|
|
except Exception as e:
|
|
errors.append(f"Official Records: no pude navegar a SearchTypeName: {e}")
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
# Step 3: Llenar form de busqueda por nombre
|
|
# Owner name del Duval PA viene como "LASTNAME FIRSTNAME M" o "LASTNAME LASTNAME2 FIRSTNAME"
|
|
# Algunos casos especiales: "CITY OF JACKSONVILLE" (no es persona)
|
|
# "JONES JOHN N" → last=JONES, first=JOHN, middle=N
|
|
parts = owner_name.strip().split()
|
|
last_name = parts[0] if parts else owner_name
|
|
first_name = parts[1] if len(parts) > 1 else ""
|
|
|
|
# Dump form structure if first time (errors=[] for debug)
|
|
# Selectores tipicos para nombre en este sistema (Acclaim Land Records)
|
|
ln_selectors = [
|
|
"input[name='lastName']", "input[id='lastName']",
|
|
"input[name='LastName']", "input[id='LastName']",
|
|
"input[name*='last']", "input[id*='last']",
|
|
]
|
|
fn_selectors = [
|
|
"input[name='firstName']", "input[id='firstName']",
|
|
"input[name='FirstName']", "input[id='FirstName']",
|
|
"input[name*='first']", "input[id*='first']",
|
|
]
|
|
|
|
ln_filled = False
|
|
for sel in ln_selectors:
|
|
if page.locator(sel).count() > 0:
|
|
page.locator(sel).first.fill(last_name)
|
|
ln_filled = True
|
|
break
|
|
|
|
if not ln_filled:
|
|
# v1: degradar gracefully — Acclaim Land Records system tiene form
|
|
# dinamico (JS-rendered) que requiere mas iteracion. v1.1 lo arregla.
|
|
errors.append(
|
|
"Official Records v1: Name Search form selectors no encontrados. "
|
|
"Lis pendens lookup automatico no disponible en este release. "
|
|
"Lookup manual: https://or.duvalclerk.com/search/SearchTypeName "
|
|
f"con last_name='{last_name}', first_name='{first_name}'"
|
|
)
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
for sel in fn_selectors:
|
|
if page.locator(sel).count() > 0 and first_name:
|
|
page.locator(sel).first.fill(first_name)
|
|
break
|
|
|
|
# Submit
|
|
search_btns = [
|
|
"input[type='submit'][value*='Search']",
|
|
"button:has-text('Search')",
|
|
"input[type='button'][value*='Search']",
|
|
"#searchButton", "#btnSearch", "button[type='submit']",
|
|
]
|
|
clicked = False
|
|
for sel in search_btns:
|
|
try:
|
|
if page.locator(sel).count() > 0:
|
|
page.locator(sel).first.click()
|
|
clicked = True
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if not clicked:
|
|
# Fallback: Enter en el lastName input
|
|
try:
|
|
for sel in ln_selectors:
|
|
if page.locator(sel).count() > 0:
|
|
page.locator(sel).first.press("Enter")
|
|
clicked = True
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if not clicked:
|
|
errors.append("Official Records: no encontre boton Search ni pude enviar via Enter")
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except PlaywrightTimeout:
|
|
pass
|
|
|
|
current_url = page.url
|
|
|
|
# Step 4: Parsear resultados. La tabla del sistema Acclaim tiene columnas
|
|
# tipicas: Doc Type, Recording Date, Party, Instrument #, Book/Page
|
|
# Buscamos rows con doc type que contenga "LIS PENDENS"
|
|
try:
|
|
# Esperar a que cargue la grid de resultados
|
|
page.wait_for_selector("table", timeout=8_000)
|
|
except PlaywrightTimeout:
|
|
errors.append(f"Official Records: tabla de resultados no apareció. URL: {current_url}")
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
# Buscar todas las filas de la tabla y filter por LIS PENDENS / LP
|
|
all_rows = page.locator("table tr").all()
|
|
lp_keywords = ["LIS PENDENS", "LISPENDENS", " LP ", "FORECLOSURE"]
|
|
for row in all_rows[1:]: # skip header
|
|
try:
|
|
cells = [(c.text_content() or "").strip() for c in row.locator("td").all()]
|
|
row_text = " ".join(cells).upper()
|
|
if any(kw in row_text for kw in lp_keywords):
|
|
# Parsear filas que matchean
|
|
match = {
|
|
"doc_type": next((c for c in cells if any(kw.strip() in c.upper() for kw in lp_keywords)), "Lis Pendens"),
|
|
"all_columns_text": cells,
|
|
"source_url": current_url,
|
|
}
|
|
# Intentar extraer fecha
|
|
for c in cells:
|
|
date_m = re.search(r"\d{1,2}/\d{1,2}/\d{4}", c)
|
|
if date_m:
|
|
match["filing_date"] = date_m.group(0)
|
|
break
|
|
# Intentar extraer instrument #
|
|
for c in cells:
|
|
inst_m = re.match(r"\d{4,}-\d{4,}", c) or re.match(r"\d{8,}", c)
|
|
if inst_m:
|
|
match["instrument_number"] = inst_m.group(0)
|
|
break
|
|
matches.append(match)
|
|
except Exception:
|
|
pass
|
|
|
|
if not matches:
|
|
# Sin matches NO es error — la propiedad puede estar limpia
|
|
errors.append(
|
|
f"Sin matches de Lis Pendens para owner '{owner_name}' en Duval Official Records. "
|
|
f"Esto puede significar: (a) la propiedad NO esta en foreclosure, o "
|
|
f"(b) el owner_name parseado no matchea el formato del clerk. URL final: {current_url}"
|
|
)
|
|
|
|
browser.close()
|
|
return matches, errors
|
|
|
|
except Exception as e:
|
|
errors.append(f"Official Records Duval scrape error: {e}")
|
|
return matches, errors
|
|
|
|
|
|
def _fetch_duval(address: str) -> dict:
|
|
"""Pipeline completo Duval: owner lookup + lis pendens search + liens inventory.
|
|
|
|
v1: solo Property Appraiser funciona. Lis pendens scraper devuelve estructura
|
|
vacia con warning. Liens inventory tambien vacia + warning.
|
|
|
|
v1.1 (deferred a Phase 3.5): popular lis_pendens + all_liens reales.
|
|
"""
|
|
errors: list[str] = []
|
|
sources_used: list[str] = []
|
|
|
|
# Step 1: owner name from Property Appraiser
|
|
owner_data, owner_errors = _fetch_property_owner_duval(address)
|
|
errors.extend(owner_errors)
|
|
if owner_data:
|
|
sources_used.append(owner_data.get("source", "Duval Property Appraiser"))
|
|
|
|
# Step 2: lis pendens lookup (solo si tenemos owner_name)
|
|
lp_matches = []
|
|
if owner_data and owner_data.get("owner_name"):
|
|
lp_matches, lp_errors = _fetch_lis_pendens_duval(
|
|
owner_data["owner_name"], address
|
|
)
|
|
errors.extend(lp_errors)
|
|
if not lp_errors or "Sin matches" in (lp_errors[0] if lp_errors else ""):
|
|
sources_used.append("Duval Official Records (or.duvalclerk.com)")
|
|
|
|
# Step 3 (Wave 1.5A v1.2): Liens inventory — DEFERRED a v1.1, devolver placeholder
|
|
# Cuando v1.1 funcione: _fetch_liens_duval(owner_data['owner_name'], owner_data['re_number'])
|
|
# devolvera la lista completa de liens via doc_type filters en or.duvalclerk.com.
|
|
liens_data = _empty_liens_inventory(
|
|
reason="Acclaim Land Records scraper deferred to v1.1. Lookup manual disponible."
|
|
)
|
|
|
|
# Step 4: Plaintiff classification (solo si hay lis pendens detectado)
|
|
plaintiff_info = None
|
|
if lp_matches:
|
|
# En v1.1 cuando se parsee correctamente, el lis pendens row tendra columns
|
|
# con plaintiff name. Por ahora, intentar extraer del primer match si esta.
|
|
first_lp = lp_matches[0] if lp_matches else {}
|
|
plaintiff_name_raw = (
|
|
first_lp.get("plaintiff")
|
|
or (first_lp.get("all_columns_text") or [None])[0]
|
|
)
|
|
plaintiff_info = classify_plaintiff(plaintiff_name_raw)
|
|
|
|
# Status determination
|
|
# v1: si el Lis Pendens lookup degrada gracefully, status='OWNER_VERIFIED'
|
|
# (sabemos que la propiedad existe + owner, pero NO podemos confirmar foreclosure).
|
|
# En v1.1 cuando Official Records funcione completamente, podra subir a 'CLEAN'.
|
|
lis_pendens_degraded = any(
|
|
"Official Records v1" in e for e in errors
|
|
)
|
|
|
|
if lp_matches:
|
|
status = "LIS_PENDENS_ACTIVE"
|
|
most_recent = sorted(
|
|
[m for m in lp_matches if m.get("filing_date")],
|
|
key=lambda m: m["filing_date"], reverse=True
|
|
)
|
|
most_recent_date = most_recent[0]["filing_date"] if most_recent else None
|
|
elif owner_data:
|
|
# Tenemos owner pero no pudimos verificar foreclosures
|
|
if lis_pendens_degraded:
|
|
status = "OWNER_VERIFIED" # PA OK, lis pendens manual
|
|
else:
|
|
status = "CLEAN" # Both PA + lis pendens lookups OK, no matches
|
|
most_recent_date = None
|
|
else:
|
|
status = "UNKNOWN"
|
|
most_recent_date = None
|
|
|
|
# Pull case_number from first lis_pendens match if available
|
|
case_number = None
|
|
if lp_matches:
|
|
first_lp = lp_matches[0]
|
|
case_number = (
|
|
first_lp.get("case_number")
|
|
or first_lp.get("instrument_number")
|
|
)
|
|
|
|
return {
|
|
"status": status,
|
|
"county": "Duval",
|
|
"address": address,
|
|
# Property Appraiser data
|
|
"owner_name": (owner_data or {}).get("owner_name"),
|
|
"re_number": (owner_data or {}).get("re_number"),
|
|
"tax_assessed_value": (owner_data or {}).get("tax_assessed_value"),
|
|
"year_built_official": (owner_data or {}).get("year_built"),
|
|
"last_sale_date": (owner_data or {}).get("last_sale_date"),
|
|
# Lis pendens detail
|
|
"lis_pendens": lp_matches,
|
|
"lis_pendens_count": len(lp_matches),
|
|
"most_recent_lis_pendens_date": most_recent_date,
|
|
"case_number": case_number,
|
|
# Wave 1.5A v1.2: Plaintiff + liens structured fields
|
|
"plaintiff": plaintiff_info,
|
|
"loan_origin": None, # v1.1 popula desde MTG records cuando funcione
|
|
"all_liens": liens_data["all_liens"],
|
|
"lien_count": liens_data["lien_count"],
|
|
"total_surviving_debt": liens_data["total_surviving_debt"],
|
|
"investor_warning": liens_data["investor_warning"],
|
|
"liens_detail_status": liens_data["detail_status"],
|
|
# Meta
|
|
"sources_used": sources_used,
|
|
"errors": errors,
|
|
"fetched_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Public API
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
def fetch_court_records(
|
|
*,
|
|
address: str,
|
|
county_name: Optional[str] = None,
|
|
) -> dict:
|
|
"""Entry point. Dispatches per county. Soft-fail si condado no implementado.
|
|
|
|
Returns dict con:
|
|
status: CLEAN | LIS_PENDENS_ACTIVE | CODE_VIOLATIONS | TAX_DELINQUENT
|
|
| UNKNOWN | NOT_IMPLEMENTED | DISABLED
|
|
county
|
|
owner_name, re_number, tax_assessed_value, year_built_official, last_sale_date
|
|
lis_pendens: list of cases (si LIS_PENDENS_ACTIVE)
|
|
sources_used, errors, fetched_at
|
|
"""
|
|
fetched_at = datetime.now(timezone.utc).isoformat()
|
|
|
|
if not _enable_court_records():
|
|
return {
|
|
"status": "DISABLED",
|
|
"county": county_name,
|
|
"address": address,
|
|
"recommendation": (
|
|
"Court records scraping deshabilitado. Activar ENABLE_COURT_RECORDS=true "
|
|
"en .env para deteccion deterministica de foreclosure / lis pendens."
|
|
),
|
|
"sources_used": [],
|
|
"errors": [],
|
|
"fetched_at": fetched_at,
|
|
}
|
|
|
|
# Normalize county
|
|
cn = (county_name or "").strip()
|
|
cn_normalized = cn.replace(" County", "").replace(" county", "").strip()
|
|
|
|
if cn_normalized.lower() == "duval":
|
|
return _fetch_duval(address)
|
|
|
|
# Soft-fail para condados no implementados
|
|
clerk_url = COUNTY_CLERK_URLS.get(cn_normalized, "https://www.flclerks.com/")
|
|
return {
|
|
"status": "NOT_IMPLEMENTED",
|
|
"county": cn_normalized,
|
|
"address": address,
|
|
"recommendation": (
|
|
f"Court records scraper no implementado para {cn_normalized} todavia. "
|
|
f"Lookup manual en {clerk_url}. Wave 1.5A v1 cubre solo Duval; "
|
|
"Miami-Dade / Broward / Palm Beach / Hillsborough en versiones posteriores."
|
|
),
|
|
"clerk_url": clerk_url,
|
|
"sources_used": [],
|
|
"errors": [],
|
|
"fetched_at": fetched_at,
|
|
}
|