AR-House/pre_screening_orchestrator.py

"""pre_screening_orchestrator.py — Capa 1 análisis rápido (1-2 min).

OBJETIVO:
Antes de gastar 5-7 min en el pipeline completo de 8+ agentes (Capa 2), una
verificación express:
  1. Lookup court records si está disponible para el county (Duval por ahora)
  2. Pre-cálculo en Python de surviving liens según reglas FL
  3. Una llamada al LienPositionAnalyzer para interpretar y dar veredicto

OUTPUT:
{
  "verdict": "GO" | "MAYBE" | "NO-GO",
  "score": 0-10,
  "reasoning": "string",
  "purchase_price": int,
  "arv": int,
  "rehab_budget": int,
  "surviving_debt_total": int,
  "effective_cost": int,
  "margin_pct": float,
  "red_flags": [str, ...],
  "liens_detected": [...],
  "data_sources_used": [...],
  "elapsed_seconds": float,
  "errors": [str, ...]
}

USO desde Streamlit:
    from pre_screening_orchestrator import run_pre_screening
    result = run_pre_screening(deal=deal_dict_from_db, arv_override=None)
"""
from __future__ import annotations

import json
import os
import time
from typing import Callable, Optional

# ────────────────────────────────────────────────────────────────────────────
# FL lien survival rules — what survives a foreclosure judicial sale
# Lifted from data_fetchers/court_records.py analyze_lien_survival().
# ────────────────────────────────────────────────────────────────────────────

# Lien types we recognize (string canon)
LIEN_PROPERTY_TAX     = "property_tax"
LIEN_IRS              = "irs_tax"
LIEN_STATE_TAX        = "state_tax"
LIEN_MUNICIPAL        = "municipal"      # code enforcement / utility
LIEN_HOA              = "hoa"
LIEN_MORTGAGE_PRIMARY = "mortgage_primary"
LIEN_MORTGAGE_JUNIOR  = "mortgage_junior"
LIEN_MECHANIC         = "mechanic"
LIEN_JUDGMENT         = "judgment"
LIEN_UNKNOWN          = "unknown"

# Cap rules (FL): HOA inherits limited amount post-foreclosure
_HOA_INHERITED_CAP_MONTHS = 12  # FL 720.3085(2)(b)


def _is_servicer_plaintiff(plaintiff: Optional[str]) -> bool:
    """Heuristic: is the plaintiff a loan servicer (vs an investor)?
    Servicers usually mean the mortgage extinguishes cleanly.
    """
    if not plaintiff:
        return False
    p = plaintiff.upper()
    servicers = [
        "WELLS FARGO", "MR COOPER", "MR. COOPER", "SHELLPOINT", "BSI FINANCIAL",
        "NEWREZ", "PHH MORTGAGE", "SPECIALIZED LOAN SERVICING", "SLS",
        "BANK OF AMERICA", "JPMORGAN", "CHASE", "CITIBANK", "FREEDOM MORTGAGE",
        "DITECH", "OCWEN", "NATIONSTAR", "ROCKET MORTGAGE", "QUICKEN LOANS",
        "PENNYMAC", "LOANDEPOT", "CALIBER HOME LOANS",
    ]
    return any(s in p for s in servicers)


def calculate_surviving_debt(
    *,
    liens_detected: list[dict],
    deal_type: str,
    plaintiff: Optional[str] = None,
) -> tuple[int, list[dict]]:
    """Sum the dollar amount of liens that SURVIVE the foreclosure/tax-deed sale.

    Returns (total_surviving_dollars, enriched_liens_list).
    Each enriched lien gets a `survives` boolean + `survival_reason`.
    """
    plaintiff_is_servicer = _is_servicer_plaintiff(plaintiff)
    total = 0
    enriched = []

    for lien in liens_detected:
        lien_type = (lien.get("type") or LIEN_UNKNOWN).lower()
        amount = lien.get("amount") or 0
        survives = False
        reason = ""

        if deal_type == "tax_deed":
            # Tax deed sale extinguishes most liens (paid from surplus)
            # EXCEPT: HOA, municipal, federal liens
            if lien_type in (LIEN_HOA, LIEN_MUNICIPAL, LIEN_IRS):
                survives = True
                reason = f"{lien_type} survives tax deed sale (runs with land)"
            else:
                survives = False
                reason = "Extinguished by tax deed (paid from sale surplus)"

        elif deal_type in ("foreclosure", "auction"):
            # Foreclosure rules
            if lien_type == LIEN_PROPERTY_TAX:
                survives = True
                reason = "Property tax has super-priority (FL 197.122)"
            elif lien_type == LIEN_IRS:
                survives = True
                reason = "IRS lien survives w/ 120-day redemption (26 USC 7425(d))"
            elif lien_type == LIEN_MUNICIPAL:
                survives = True
                reason = "Municipal liens run with the land (FL 162.09)"
            elif lien_type == LIEN_HOA:
                survives = True
                # FL HOA cap: 12 months OR 1% of original mortgage (whichever lower)
                capped = min(amount, _HOA_INHERITED_CAP_MONTHS * (amount / 12) if amount else 0)
                reason = f"HOA capped at ~12 mo dues (FL 720.3085(2)(b))"
                amount = int(capped) if capped else amount
            elif lien_type == LIEN_MORTGAGE_PRIMARY:
                if plaintiff_is_servicer:
                    survives = False
                    reason = f"Plaintiff ({plaintiff}) is loan servicer — primary mortgage extinguishes"
                else:
                    survives = True
                    reason = "Primary mortgage may survive if plaintiff is HOA / municipality"
            elif lien_type == LIEN_MORTGAGE_JUNIOR:
                survives = False
                reason = "Junior mortgages extinguished if properly noticed"
            elif lien_type == LIEN_STATE_TAX:
                survives = True
                reason = "State tax lien survives unless paid from surplus"
            else:
                survives = True  # default conservative — assume survives, flag for review
                reason = f"{lien_type} survival uncertain — flag for title search"

        else:
            # MLS / REO / unknown deal_type → assume normal title transfer (no inherited debt)
            survives = False
            reason = "Standard sale: title transfers clean (escrow handles payoff)"

        if survives:
            total += amount
        enriched.append({
            **lien,
            "survives": survives,
            "survival_reason": reason,
            "amount_post_cap": amount,
        })

    return total, enriched


# ────────────────────────────────────────────────────────────────────────────
# Main entry point
# ────────────────────────────────────────────────────────────────────────────

DISTRESSED_DEAL_TYPES = {"foreclosure", "auction", "tax_deed", "reo"}


def run_pre_screening(
    *,
    deal: dict,
    arv_override: Optional[int] = None,
    rehab_budget_override: Optional[int] = None,
    # ─── Financial analysis params (NUEVOS, bug fix 2026-05-15) ─────────
    down_payment: Optional[float] = None,   # cash to put down (default 5% of asking)
    monthly_income: Optional[float] = None, # buyer gross monthly income (for DTI)
    rent_room_monthly: Optional[float] = None,  # house-hack: rent a room
    other_monthly_debts: Optional[float] = None,  # other debts (car, student, cc)
    loan_type: str = "conventional_oo",     # 'fha' | 'conventional_oo' | 'va'
    hoa_monthly_override: Optional[float] = None,  # HOA mensual user input
    status_cb: Optional[Callable[[str], None]] = None,
) -> dict:
    """Ejecuta el pre-screening sobre un deal scrapeado.

    HARD RULE (2026-05): si deal_type es distressed OR case_number != null,
    court_records es OBLIGATORIO. No hay skip. Si court_records no se puede
    ejecutar (county no soportado), LienPositionAnalyzer marca el verdict
    como INSUFFICIENT_DATA.

    Args:
        deal: dict desde deals_db.list_deals/get_deal_by_id
        arv_override: ARV manual si el user lo provee (sino se infiere de assessed_value)
        rehab_budget_override: rehab manual (sino se aplica logica evidence-based)
        status_cb: callback para progreso

    Returns:
        dict con verdict / score / red_flags / players / metrics.

    BUG FIX 2026-05-15: pre-screening NO inspeciona fotos (PhotoInspector es
    Wave 2). Antes se asumia rehab = 10% del listing CIEGAMENTE — inventaba
    $30K de reparacion para una propiedad recien renovada. Fix: rehab=0 por
    default, label honesto "no evaluado en pre-screening", override solo si
    user provee.
    """
    t0 = time.perf_counter()
    errors: list[str] = []
    data_sources: list[str] = []

    def _log(msg: str) -> None:
        if status_cb:
            status_cb(msg)

    # ─── Inputs from deal ──────────────────────────────────────────────────
    deal_type = (deal.get("deal_type") or "").lower()
    purchase_price = (
        deal.get("listing_price")
        or deal.get("starting_bid")
        or 0
    )
    assessed_value = deal.get("estimated_arv") or 0
    arv = arv_override or assessed_value or (int(purchase_price * 1.3) if purchase_price else 0)
    # Rehab evidence-based: solo aplicamos si user override; sino 0 + label honesto.
    # Pre-screening NO tiene PhotoInspector. Inventar rehab por edad es engañoso.
    # Wave 2 / Reporte Completo es el que hace inspeccion fotografica + age deductions.
    rehab_budget = rehab_budget_override if rehab_budget_override is not None else 0
    rehab_assessment_status = (
        "user_override" if rehab_budget_override is not None
        else "not_assessed_in_prescreening"
    )
    address = deal.get("address") or ""
    county = deal.get("county") or ""
    state = deal.get("state") or "FL"
    case_number = deal.get("case_number")

    # Hard rule: court_records mandatorio si distressed o case_number presente
    court_records_required = (
        bool(case_number) or deal_type in DISTRESSED_DEAL_TYPES
    )

    _log(f"Pre-screening: {address[:50]} · {deal_type} · ${purchase_price:,.0f}")
    if court_records_required:
        _log(f"  Court records REQUERIDO (case#={bool(case_number)}, distressed={deal_type in DISTRESSED_DEAL_TYPES})")

    # ─── STEP 1: Property Appraiser (SOURCE OF TRUTH) ─────────────────────
    # BUG FIX 2026-05-15: PA es la fuente primaria para cualquier propiedad.
    # NO usar listing data (Zillow) como source — puede estar incompleto/viejo.
    # PA da: owner real, year_built REAL, sales_history (detecta flips),
    # homestead (occupant vs investor), tax history (renovaciones por jump).
    pa_record: Optional[dict] = None
    pa_status = "not_run"
    pa_supported = False
    parcel_id_for_pa = deal.get("parcel_id")
    if county and (address or parcel_id_for_pa):
        try:
            from data_fetchers.property_appraiser import fetch_pa_record, is_pa_supported
            pa_supported = is_pa_supported(county, state)
            if not pa_supported:
                pa_status = "not_implemented_for_county"
                _log(f"  PA SKIPPED — adapter no implementado para {county} ({state})")
            else:
                _log(f"  PA lookup (county={county}, parcel={parcel_id_for_pa or '?'})...")
                pa_record = fetch_pa_record(
                    county_name=county,
                    state=state,
                    address=address,
                    parcel_id=parcel_id_for_pa,
                    zip_code=deal.get("zip"),
                    listing_price=float(purchase_price) if purchase_price else None,
                )
                if pa_record and not pa_record.get("errors"):
                    data_sources.append("property_appraiser")
                    pa_status = "found"
                    _log(f"    owner={pa_record.get('owner_name')!r} "
                         f"year_built={pa_record.get('year_built')} "
                         f"homestead={pa_record.get('homestead_active')} "
                         f"sales={len(pa_record.get('sales_history', []))}")
                elif pa_record and pa_record.get("errors"):
                    pa_status = "error"
                    errors.append(f"PA errors: {pa_record.get('errors')[:2]}")
                    _log(f"    errors: {pa_record.get('errors')[:1]}")
                else:
                    pa_status = "not_found"
                    _log(f"    no PA data returned")
        except Exception as e:
            pa_status = "error"
            errors.append(f"PA fetch failed: {type(e).__name__}: {e}")
            _log(f"    exception: {e}")
    else:
        pa_status = "missing_inputs"

    # ─── Court records lookup (ALWAYS if county+address) ──────────────────
    liens_detected: list[dict] = []
    plaintiff: Optional[str] = None
    plaintiff_classified: dict = {}
    court_records_status = "not_executed"
    court_records_data = {}

    if county and address:
        try:
            from data_fetchers.court_records import fetch_court_records, classify_plaintiff
            _log("  Consultando court records...")
            cr_result = fetch_court_records(address=address, county_name=county) or {}
            court_records_data = cr_result
            cr_status = cr_result.get("status", "?")
            court_records_status = cr_status

            # Extract plaintiff (first lis pendens found)
            lis_pendens = cr_result.get("lis_pendens") or []
            if lis_pendens:
                plaintiff = lis_pendens[0].get("plaintiff")
                plaintiff_classified = classify_plaintiff(plaintiff)
                _log(f"    Plaintiff: {plaintiff} | type={plaintiff_classified.get('type')}")

            # Extract liens
            liens_raw = cr_result.get("liens_inventory") or {}
            items = liens_raw.get("items") or []
            for item in items:
                liens_detected.append({
                    "type": item.get("type", "unknown"),
                    "amount": item.get("amount", 0),
                    "creditor": item.get("creditor"),
                    "filed_date": item.get("filed_date"),
                })
            if items or lis_pendens:
                data_sources.append("court_records")
            _log(f"    Status={cr_status} · {len(liens_detected)} liens · {len(lis_pendens)} lis_pendens")
        except Exception as e:
            errors.append(f"court_records error: {type(e).__name__}: {e}")
            court_records_status = "error"
            _log(f"    error: {e}")
    else:
        court_records_status = "missing_inputs"
        _log(f"  Court records SKIPPED — missing county or address")

    # Sanity check: court_records REQUIRED but not run → flag for INSUFFICIENT_DATA.
    # OWNER_VERIFIED also counts as successful execution (court records ran, owner
    # confirmed via PA, no judicial proceedings found — that's valid output).
    _SUCCESSFUL_COURT_STATUSES = {
        "CLEAN", "LIS_PENDENS_ACTIVE", "CODE_VIOLATIONS", "TAX_DELINQUENT",
        "FORECLOSURE_COMPLETE", "FORECLOSURE_PENDING", "OWNER_VERIFIED",
    }
    court_records_required_but_missing = (
        court_records_required and court_records_status not in _SUCCESSFUL_COURT_STATUSES
    )

    # ─── Tax assessed (Property Appraiser, FREE) ────────────────────────────
    # Honesty: distinguir "county no implementado" de "buscamos y no estaba"
    # Broward uses parcel_id (folio) for lookup; other counties may use address
    tax_assessed_data = None
    tax_assessed_status = "not_run"
    parcel_id_for_pa = deal.get("parcel_id")
    if county and (address or parcel_id_for_pa):
        try:
            from data_fetchers.property_value import fetch_tax_assessed, is_tax_assessed_supported
            if not is_tax_assessed_supported(county, state):
                tax_assessed_status = "not_implemented_for_county"
                _log(f"  Tax assessed SKIPPED — scraper no implementado para {county} ({state})")
            else:
                _log(f"  Tax assessed (PA, parcel_id={parcel_id_for_pa!r})...")
                tax_assessed_data = fetch_tax_assessed(
                    address=address, county_name=county, state=state,
                    parcel_id=parcel_id_for_pa,
                )
                if tax_assessed_data and tax_assessed_data.get("assessed_value"):
                    data_sources.append("tax_assessed")
                    tax_assessed_status = "found"
                    _log(f"    assessed=${tax_assessed_data.get('assessed_value', '?')} "
                         f"market=${tax_assessed_data.get('just_value', '?')} "
                         f"year={tax_assessed_data.get('year_built', '?')} "
                         f"sqft={tax_assessed_data.get('sqft', '?')} "
                         f"owner={tax_assessed_data.get('owner_name', '?')[:30]}")
                else:
                    tax_assessed_status = "not_found"
                    _log(f"    no data returned for parcel_id={parcel_id_for_pa!r}")
        except Exception as e:
            errors.append(f"tax_assessed error: {type(e).__name__}: {e}")
            tax_assessed_status = "error"
            _log(f"    error: {e}")
    else:
        tax_assessed_status = "missing_inputs"

    # Derive better ARV if tax_assessed found one and user didn't override
    if not arv_override and tax_assessed_data and tax_assessed_data.get("assessed_value"):
        arv_assessed = float(tax_assessed_data["assessed_value"])
        # Use whichever is higher: original arv or tax_assessed-based
        arv = max(arv, arv_assessed)

    # ─── Comparables — PA sales_history es source primaria (bug fix 2026-05-15)
    # ANTES: fetch_zillow_comps (Firecrawl, 1 credit) por cada pre-screening.
    # AHORA: PA sales_history del MISMO property + sales recientes del subdivision
    # son comps oficiales del county. Solo caemos a Firecrawl si PA no disponible
    # Y user habilita ENABLE_FIRECRAWL_COMPS=true.
    comps_data: list[dict] = []
    comps_estimate: Optional[int] = None
    comps_status = "not_run"
    if pa_record and pa_record.get("sales_history"):
        # SOURCE OF TRUTH path — PA sales history
        pa_sales = pa_record["sales_history"] or []
        # Use qualified arm's-length sales como comps (excluye quit claims, $100 transfers)
        for s in pa_sales[:5]:
            price = s.get("price")
            if not price or price < 5000:
                continue
            if str(s.get("qualified", "")).lower().startswith("unqualified"):
                continue
            comps_data.append({
                "address": pa_record.get("site_address") or address,
                "sold_price": price,
                "sold_date_text": s.get("date"),
                "sqft": pa_record.get("sqft_heated") or pa_record.get("sqft_total"),
                "price_per_sqft": (
                    round(price / (pa_record.get("sqft_heated") or 1500), 0)
                    if pa_record.get("sqft_heated") else None
                ),
                "source": f"{pa_record.get('county', 'County')} PA (sales history of this property)",
                "deed_type": s.get("deed_type") or s.get("qualification"),
            })
        if comps_data:
            # Recent qualified sale of THIS property is the strongest comp
            comps_estimate = comps_data[0].get("sold_price")
            data_sources.append("pa_sales_history")
            comps_status = f"found_{len(comps_data)}_pa_sales"
            _log(f"  Comps via PA sales_history: {len(comps_data)} qualified sales, "
                 f"most recent ${comps_estimate or 0:,}")
        else:
            comps_status = "no_qualified_pa_sales"
            _log(f"  PA sales_history exists but no qualified arm's-length sales")
    elif os.getenv("ENABLE_FIRECRAWL_COMPS", "false").lower() == "true":
        # FALLBACK path — PA no disponible, usar Firecrawl (paga)
        try:
            from data_fetchers.property_value import fetch_zillow_comps, estimate_value_from_comps
            beds = deal.get("beds") or 3
            baths = deal.get("baths") or 2
            sqft = deal.get("sqft") or 1500
            zip_code = deal.get("zip")
            if zip_code:
                _log("  Comps FALLBACK Firecrawl Zillow (1 credit, paga)...")
                comps_data, comps_errors = fetch_zillow_comps(
                    zip_code=zip_code,
                    beds=int(beds), baths=float(baths), sqft=int(sqft),
                    max_count=3,
                )
                if comps_data:
                    comps_estimate, _conf = estimate_value_from_comps(comps_data, int(sqft))
                    data_sources.append("zillow_comps")
                    comps_status = f"found_{len(comps_data)}_zillow_comps"
                else:
                    comps_status = "no_comps_found"
            else:
                comps_status = "missing_zip"
        except Exception as e:
            errors.append(f"comps error: {type(e).__name__}: {e}")
            comps_status = "error"
    else:
        comps_status = "not_run_no_pa_no_firecrawl"
        _log(f"  Comps SKIPPED — no PA sales_history y Firecrawl off")

    # ─── Price validation — PA es source primaria (bug fix 2026-05-15) ─────
    # ANTES: validate_price() llamaba Firecrawl (Zestimate + Redfin) por cada
    # pre-screening — gastaba 2 credits incluso cuando PA tenia just_value gratis.
    # AHORA: si tenemos PA con just_value, usamos eso como market reference.
    # Solo caemos a Firecrawl si PA no implementado para este county Y el user
    # explicitamente habilita ENABLE_FIRECRAWL_PRICE_CHECK=true.
    price_validation = None
    price_status = "not_run"
    if purchase_price:
        if pa_record and pa_record.get("just_value_current"):
            # SOURCE OF TRUTH path — usar PA just_value
            pa_market = float(pa_record["just_value_current"])
            listing = float(purchase_price)
            discrepancy_pct = ((listing - pa_market) / pa_market * 100) if pa_market > 0 else 0
            # Status thresholds (mismo que validate_price legacy)
            if abs(discrepancy_pct) <= 10:
                status = "NORMAL"
            elif discrepancy_pct < -25:
                status = "CRITICAL_RED_FLAG"  # listing >>< market (under-priced, possible distress)
            elif discrepancy_pct < -10:
                status = "RED_FLAG"  # listing under-market 10-25%
            elif discrepancy_pct > 25:
                status = "OVERPRICED_SEVERE"
            else:
                status = "OVERPRICED_MODERATE"
            price_validation = {
                "status": status,
                "market_estimate": int(pa_market),
                "listing_price": int(listing),
                "signed_max_discrepancy_pct": round(discrepancy_pct, 1),
                "sources_used": [f"{pa_record.get('county', 'County')} Property Appraiser (just_value)"],
                "confidence": "high",  # PA es oficial
                "method": "pa_just_value",
            }
            data_sources.append("price_validator")
            price_status = status
            _log(f"  Price validation via PA just_value: status={status} "
                 f"market=${pa_market:,.0f} listing=${listing:,.0f} ({discrepancy_pct:+.1f}%)")
        elif os.getenv("ENABLE_FIRECRAWL_PRICE_CHECK", "false").lower() == "true":
            # FALLBACK path — PA no disponible, usar Firecrawl (paga)
            try:
                from data_fetchers.price_validator import validate_price
                _log("  Price validation FALLBACK (Firecrawl Zestimate/Redfin, paga)...")
                price_validation = validate_price(
                    address=address,
                    listing_price=float(purchase_price),
                    tax_assessed_value=tax_assessed_data.get("assessed_value") if tax_assessed_data else None,
                    use_firecrawl=True,
                )
                data_sources.append("price_validator")
                price_status = price_validation.get("status", "?")
                _log(f"    status={price_status}")
            except Exception as e:
                errors.append(f"price_validator error: {type(e).__name__}: {e}")
                price_status = "error"
        else:
            price_status = "not_run_no_pa_no_firecrawl"
            _log(f"  Price validation SKIPPED — no PA data y Firecrawl off")

    # If foreclosure with final_judgment_amount but no detailed liens, treat it as a primary mortgage lien
    final_judgment = deal.get("final_judgment_amount")
    if deal_type in ("foreclosure", "auction") and final_judgment and not liens_detected:
        liens_detected.append({
            "type": LIEN_MORTGAGE_PRIMARY,
            "amount": int(final_judgment),
            "creditor": plaintiff or "(unknown plaintiff)",
            "filed_date": None,
        })
        data_sources.append("final_judgment_amount")

    # ─── Calculate surviving debt ──────────────────────────────────────────
    surviving_debt, liens_enriched = calculate_surviving_debt(
        liens_detected=liens_detected,
        deal_type=deal_type,
        plaintiff=plaintiff,
    )
    effective_cost = int(purchase_price + surviving_debt + rehab_budget)
    margin_pct = round(((arv - effective_cost) / arv * 100), 1) if arv > 0 else 0.0
    _log(f"  Surviving debt: ${surviving_debt:,} · effective ${effective_cost:,} · margin {margin_pct}%")

    # ─── Build data_sources_status to inform the LLM ───────────────────────
    # tax_assessed_status puede ser: found | not_found | not_implemented_for_county
    #   | missing_inputs | error | not_run
    # property_appraiser tiene el mismo set de estados.
    data_sources_status = {
        "property_appraiser": pa_status,
        "price_validator": price_status,
        "court_records": court_records_status,
        "comps": comps_status,
        "tax_assessed": tax_assessed_status,
    }

    # ─── Call LienPositionAnalyzer for verdict + reasoning ─────────────────
    _log("  Llamando LienPositionAnalyzer...")
    verdict_data = _call_lien_analyzer(
        deal_type=deal_type,
        purchase_price=int(purchase_price),
        arv=arv,
        rehab_budget=rehab_budget,
        rehab_assessment_status=rehab_assessment_status,
        pa_record=pa_record,
        liens_detected=liens_enriched,
        plaintiff_info={
            "plaintiff_name": plaintiff,
            "plaintiff_type": plaintiff_classified.get("type") if plaintiff_classified else None,
            "plaintiff_category": plaintiff_classified.get("category") if plaintiff_classified else None,
            "plaintiff_note": plaintiff_classified.get("note") if plaintiff_classified else None,
        },
        case_number=case_number,
        court_records_required=court_records_required,
        court_records_required_but_missing=court_records_required_but_missing,
        data_sources_status=data_sources_status,
        pre_computed={
            "total_surviving_debt": surviving_debt,
            "effective_acquisition_cost": effective_cost,
            "margin_vs_arv": margin_pct / 100,
        },
        # Enriquecido con data publica para que el LLM razone con hechos
        tax_assessed_data=tax_assessed_data,
        price_validation=price_validation,
        comps_data=comps_data,
        comps_estimate=comps_estimate,
    )
    if verdict_data.get("_error"):
        errors.append(verdict_data["_error"])

    # ─── OWNER CLASSIFIER + REO SIGNAL (bug fix 2026-05-15) ──────────────
    # Detecta lender-owned properties para flag REO direct outreach opportunity.
    owner_classification = None
    reo_signal = None
    if pa_record and pa_record.get("owner_name"):
        try:
            from data_fetchers.owner_classifier import classify_owner, build_reo_signal
            owner_classification = classify_owner(
                pa_record.get("owner_name"),
                co_owners=pa_record.get("co_owners") or [],
            )
            reo_signal = build_reo_signal(
                owner_classification=owner_classification,
                just_value=pa_record.get("just_value_current"),
                assessed_value=pa_record.get("assessed_value_current"),
                listing_price=float(purchase_price) if purchase_price else None,
                taxes_paid_last=pa_record.get("taxes_paid_last"),
                mailing_address=pa_record.get("mailing_address"),
            )
            if reo_signal and reo_signal.get("is_reo_opportunity"):
                _log(f"  REO OPPORTUNITY: {owner_classification.get('type')} "
                     f"owner detected. Suggested offer "
                     f"${reo_signal.get('suggested_offer_low'):,}-"
                     f"${reo_signal.get('suggested_offer_high'):,}")
        except Exception as e:
            errors.append(f"owner_classifier error: {type(e).__name__}: {e}")

    # ─── FINANCIAL ANALYSIS (bug fix 2026-05-15) ────────────────────────────
    # Compute max profitable offer, multi-price payment table, live-in scenario
    # with DTI evaluation if income provided.
    # HOA: priority user override > deal.hoa_monthly > 0
    hoa_resolved = hoa_monthly_override if hoa_monthly_override is not None else (deal.get("hoa_monthly") or 0)
    financial_analysis = _build_financial_analysis(
        purchase_price=float(purchase_price) if purchase_price else 0,
        arv=float(arv) if arv else 0,
        pa_record=pa_record,
        down_payment=down_payment,
        monthly_income=monthly_income,
        rent_room_monthly=rent_room_monthly,
        other_monthly_debts=other_monthly_debts,
        loan_type=loan_type,
        hoa_monthly=float(hoa_resolved or 0),
        rehab_budget_known=rehab_budget if rehab_assessment_status == "user_override" else None,
    )
    _log(f"  Financial: max_offer=${financial_analysis['max_profitable_offer']['max_offer']:,.0f} "
         f"(target {int(financial_analysis['max_profitable_offer']['target_margin_pct'])}% margin). "
         f"DTI verdict: {financial_analysis['live_in_scenario'].get('dti_evaluation',{}).get('verdict','N/A') if financial_analysis['live_in_scenario'].get('dti_evaluation') else 'N/A (no income)'}.")

    elapsed = time.perf_counter() - t0

    # If court_records was required but couldn't run, override LLM verdict to
    # INSUFFICIENT_DATA (the LLM should also do this per its prompt, but we
    # enforce it deterministically here as backstop).
    final_verdict = verdict_data.get("verdict") or _fallback_verdict(margin_pct)
    if court_records_required_but_missing and final_verdict != "INSUFFICIENT_DATA":
        final_verdict = "INSUFFICIENT_DATA"
        verdict_data["reasoning"] = (
            f"Court records search no se pudo ejecutar para {county} County "
            f"(status={court_records_status}). El deal es distressed o tiene "
            f"case_number activo, por lo que se requiere verificacion judicial "
            f"antes de emitir veredicto responsable."
        )
        verdict_data["score"] = 0

    result_dict = {
        "verdict": final_verdict,
        "score": verdict_data.get("score") if verdict_data.get("score") is not None else _fallback_score(margin_pct),
        "reasoning": verdict_data.get("reasoning") or _fallback_reasoning(margin_pct, surviving_debt),
        "purchase_price": int(purchase_price),
        "arv": arv,
        "rehab_budget": rehab_budget,
        "rehab_assessment_status": rehab_assessment_status,  # NEW: honest label
        "surviving_debt_total": surviving_debt,
        "effective_cost": effective_cost,
        "margin_pct": margin_pct,
        "red_flags": verdict_data.get("red_flags") or [],
        "liens_detected": liens_enriched,
        "plaintiff": plaintiff,
        "players": _ensure_players(verdict_data.get("players"), plaintiff, plaintiff_classified),
        # Rich public data (no inventado por LLM)
        "property_appraiser": pa_record,  # SOURCE OF TRUTH
        "owner_classification": owner_classification,  # type (BANK/GSE/IND/etc)
        "reo_signal": reo_signal,  # is_reo_opportunity + offer range + justification
        "financial_analysis": financial_analysis,  # multi-price + live-in + DTI + max offer
        "tax_assessed": tax_assessed_data,
        "price_validation": price_validation,
        "comps": comps_data,
        "comps_estimate": comps_estimate,
        "court_records_raw": court_records_data,
        "data_sources_used": data_sources,
        "data_sources_status": data_sources_status,
        "court_records_required": court_records_required,
        "court_records_status": court_records_status,
        "elapsed_seconds": round(elapsed, 1),
        "errors": errors,
    }

    # Persist to property folder (idempotent — saves timestamped pre_screening JSON)
    property_folder = _persist_to_property_folder(deal, result_dict)
    if property_folder:
        result_dict["property_folder"] = property_folder
        _log(f"  Saved to: {property_folder}")
    else:
        result_dict["property_folder"] = None

    return result_dict


def _ensure_players(players_from_llm: Optional[dict], plaintiff: Optional[str],
                     classified: dict) -> dict:
    """Asegura que players tenga una implication util.

    Si el LLM devolvio empty/missing fields, usar fallback determinista.
    """
    if not players_from_llm:
        return _build_default_players(plaintiff, classified)

    implication = (players_from_llm.get("implication") or "").strip()
    if not implication:
        fallback = _build_default_players(plaintiff, classified)
        players_from_llm["implication"] = fallback["implication"]
    if not players_from_llm.get("plaintiff_name") and plaintiff:
        players_from_llm["plaintiff_name"] = plaintiff
    if not players_from_llm.get("plaintiff_type") and classified.get("type"):
        players_from_llm["plaintiff_type"] = classified["type"]
    return players_from_llm


def _build_default_players(plaintiff: Optional[str], classified: dict) -> dict:
    """Fallback players dict si el LLM no lo generó.

    Si no hay plaintiff: explica las 3 posibilidades (MLS legit / auction off
    de court / case prematuro) para que el usuario sepa qué considerar.
    """
    if not plaintiff:
        return {
            "plaintiff_name": None,
            "plaintiff_type": None,
            "implication": (
                "No se identificó plaintiff en court records del county. "
                "Tres posibilidades: "
                "(a) deal es MLS legítimo, no hay proceso judicial; "
                "(b) deal es auction privada (auction.com / hubzu / bank-owned), "
                "no court-supervised — investigar la fuente Zillow; "
                "(c) lis pendens recién filed y aún no indexado — re-correr en 48h. "
                "Si Zillow muestra 'Foreclosure Auction' considerá (b) como más probable."
            ),
        }
    return {
        "plaintiff_name": plaintiff,
        "plaintiff_type": classified.get("type") if classified else None,
        "implication": (classified.get("note") if classified else None)
                       or "Ver clasificación de plaintiff.",
    }


def _call_lien_analyzer(
    *,
    deal_type: str,
    purchase_price: int,
    arv: int,
    rehab_budget: int,
    rehab_assessment_status: str = "not_assessed_in_prescreening",
    pa_record: Optional[dict] = None,
    liens_detected: list[dict],
    plaintiff_info: dict,
    case_number: Optional[str],
    court_records_required: bool,
    court_records_required_but_missing: bool,
    data_sources_status: dict,
    pre_computed: dict,
    tax_assessed_data: Optional[dict] = None,
    price_validation: Optional[dict] = None,
    comps_data: Optional[list] = None,
    comps_estimate: Optional[int] = None,
) -> dict:
    """Llama al modelo Ollama LienPositionAnalyzer. JSON output esperado."""
    try:
        import ollama
        import concurrent.futures
    except ImportError:
        return {"_error": "ollama package not available"}

    # Annotate data_sources_status with semantic flags so the LLM doesn't
    # mistake "ran without findings" for "missing source" or "not implemented".
    annotated_sources = {}
    for k, v in data_sources_status.items():
        if v in ("NOT_IMPLEMENTED", "not_implemented_for_county"):
            annotated_sources[k] = {
                "status": v,
                "outcome": "not_supported_for_county",
                "interpretation": (
                    "Source is PUBLICLY AVAILABLE for this county but our scraper "
                    "does NOT YET cover it. Be honest: do NOT claim data was searched "
                    "and not found. State explicitly that this source has not been "
                    "checked because our adapter is pending. List as red_flag."
                ),
            }
        elif v in ("UNKNOWN", "no_comps_found", "no_results", "INCONCLUSIVE"):
            annotated_sources[k] = {"status": v, "outcome": "ran_inconclusive",
                                     "interpretation": "Source DID run but produced no actionable result. Do NOT say 'falta validación de X' — say 'X result inconclusive'."}
        elif v in ("not_run", "error", "missing_inputs", "not_found", "missing_zip"):
            annotated_sources[k] = {"status": v, "outcome": "did_not_run",
                                     "interpretation": "Source DID NOT execute. You may say data is missing for this source."}
        else:
            annotated_sources[k] = {"status": v, "outcome": "ran_successfully",
                                     "interpretation": "Source executed and produced a result. Use the data from the corresponding section in your reasoning."}

    # PA data — SOURCE OF TRUTH para el LLM
    pa_summary = None
    if pa_record:
        rs = pa_record.get("renovation_signal") or {}
        pa_summary = {
            "owner_name": pa_record.get("owner_name"),
            "parcel_id": pa_record.get("parcel_id"),
            "year_built": pa_record.get("year_built"),
            "effective_year_built": pa_record.get("effective_year_built"),
            "bedrooms": pa_record.get("bedrooms"),
            "baths": pa_record.get("baths"),
            "sqft_heated": pa_record.get("sqft_heated"),
            "homestead_active": pa_record.get("homestead_active"),
            "homestead_amount": pa_record.get("homestead_amount"),
            "owner_address_mismatch": pa_record.get("owner_address_mismatch"),
            "just_value_current": pa_record.get("just_value_current"),
            "assessed_value_current": pa_record.get("assessed_value_current"),
            "just_value_last": pa_record.get("just_value_last"),
            "assessed_value_last": pa_record.get("assessed_value_last"),
            "use_code": pa_record.get("use_code"),
            "use_description": pa_record.get("use_description"),
            "zoning": pa_record.get("zoning"),
            "subdivision": pa_record.get("subdivision"),
            "roof_type": pa_record.get("roof_type"),
            "exterior_wall": pa_record.get("exterior_wall"),
            "most_recent_qualified_sale": pa_record.get("most_recent_qualified_sale"),
            "renovation_signal": {
                "is_flip_pattern": rs.get("is_flip_pattern"),
                "evidence": rs.get("evidence"),
                "value_increase_pct": rs.get("value_increase_pct"),
                "months_between": rs.get("months_between"),
            },
            "sales_history_count": len(pa_record.get("sales_history") or []),
            "sales_history_top3": (pa_record.get("sales_history") or [])[:3],
            "source": pa_record.get("source"),
        }

    user_payload = {
        "deal_type": deal_type,
        "purchase_price": purchase_price,
        "arv": arv,
        "rehab_budget": rehab_budget,
        "rehab_assessment_status": rehab_assessment_status,  # 'not_assessed_in_prescreening' | 'user_override'
        "property_appraiser": pa_summary,  # SOURCE OF TRUTH
        "liens_detected": [
            {k: v for k, v in lien.items() if k in
             ("type", "amount", "creditor", "notes", "survives", "survival_reason", "amount_post_cap")}
            for lien in liens_detected
        ],
        "plaintiff_info": plaintiff_info,
        "case_number": case_number,
        "court_records_required": court_records_required,
        "court_records_required_but_missing": court_records_required_but_missing,
        "data_sources_status": data_sources_status,
        "data_sources_annotated": annotated_sources,
        "pre_computed": pre_computed,
        # Rich public data — el LLM puede usar para razonar con hechos.
        # Para counties con full PA adapter (Broward), pasamos data extendida:
        # mailing_address, sales_history, taxes_paid, homestead_active — el LLM
        # puede detectar oportunidades como REO direct outreach, owner-occupied
        # vs absentee, undervalued purchases via sales_history vs market, etc.
        "tax_assessed": (
            {k: tax_assessed_data.get(k) for k in
             ("assessed_value", "market_value", "just_value", "year_built", "sqft",
              "beds", "baths", "owner_name", "source",
              "mailing_address", "situs_address", "neighborhood", "use_code",
              "legal_description", "homestead_active",
              "taxes_paid_last_year", "tax_year_last",
              "sales_history",  # list of {date, type, price, book_page_or_cin}
              )}
            if tax_assessed_data else None
        ),
        "price_validation": (
            {k: price_validation.get(k) for k in
             ("status", "market_estimate", "signed_max_discrepancy_pct",
              "sources_used", "confidence")}
            if price_validation else None
        ),
        "comps_sample": (
            [{k: c.get(k) for k in ("sold_price", "sqft", "price_per_sqft", "address", "sold_date_text")}
             for c in comps_data[:3]]
            if comps_data else None
        ),
        "comps_estimate": comps_estimate,
    }

    prompt = (
        "Analizá la siguiente situación de liens y devolvé el veredicto en JSON estricto.\n\n"
        f"```json\n{json.dumps(user_payload, indent=2, default=str)}\n```\n"
    )

    try:
        resp = ollama.chat(
            model="LienPositionAnalyzer",
            messages=[{"role": "user", "content": prompt}],
            format="json",
            options={"temperature": 0.2, "num_ctx": 8192},
        )
        content = resp["message"]["content"]
        data = json.loads(content)
        return data
    except Exception as e:
        return {"_error": f"LienPositionAnalyzer error: {type(e).__name__}: {e}"}


def _fallback_verdict(margin_pct: float) -> str:
    """Si el agente falla, dar un veredicto basado solo en el margen pre-calculado."""
    if margin_pct < 15:
        return "NO-GO"
    elif margin_pct < 30:
        return "MAYBE"
    return "GO"


def _fallback_score(margin_pct: float) -> int:
    if margin_pct < 0:
        return 1
    if margin_pct < 15:
        return 3
    if margin_pct < 25:
        return 5
    if margin_pct < 35:
        return 7
    return 9


def _fallback_reasoning(margin_pct: float, surviving_debt: int) -> str:
    return (
        f"Margen calculado {margin_pct:.1f}% sobre ARV. "
        f"Surviving debt estimado: ${surviving_debt:,}. "
        f"Análisis profundo recomendado (LienPositionAnalyzer no disponible)."
    )


# ════════════════════════════════════════════════════════════════════════════
# Financial Analysis Builder (bug fix 2026-05-15)
# ════════════════════════════════════════════════════════════════════════════

def _build_financial_analysis(
    *,
    purchase_price: float,
    arv: float,
    pa_record: Optional[dict] = None,
    down_payment: Optional[float] = None,
    monthly_income: Optional[float] = None,
    rent_room_monthly: Optional[float] = None,
    other_monthly_debts: Optional[float] = None,
    loan_type: str = "conventional_oo",
    hoa_monthly: float = 0,
    rehab_budget_known: Optional[float] = None,
) -> dict:
    """Comprehensive financial analysis for pre-screening output.

    Combines:
    - Max profitable offer (with target investor margin)
    - Multi-price payment table (max_offer / midpoint / asking)
    - Live-in scenario with FHA/Conventional rates
    - DTI evaluation (if monthly_income provided)
    - Rent-a-room (house-hack) scenarios

    PA record used to extract real tax_annual (from assessed_value).
    """
    from finance_calculator import (
        calculate_max_profitable_offer, calculate_payment_table,
        calculate_live_in_scenario,
    )

    # Defaults if not provided
    if down_payment is None or down_payment <= 0:
        # Default: 5% of asking (~conventional OO minimum)
        down_payment = purchase_price * 0.05 if purchase_price > 0 else 0
    if rent_room_monthly is None:
        rent_room_monthly = 0
    if other_monthly_debts is None:
        other_monthly_debts = 0

    # Pull real tax/insurance from PA if available
    tax_annual = 0.0
    insurance_annual = 0.0
    if pa_record:
        # Use just_value_current as basis. Tax ~= 1.3% of just value in FL average.
        # (Real tax_breakdown comes from full-county adapter; for now estimate).
        just_value = pa_record.get("just_value_current") or 0
        if just_value:
            tax_annual = just_value * 0.013
        # Insurance: use FL_INSURANCE_TIERS heuristic
        from finance_calculator import FL_INSURANCE_TIERS
        for cap, premium in FL_INSURANCE_TIERS:
            if just_value < cap:
                insurance_annual = premium
                break

    # Rehab estimate — use user override if available, else assume modest 5% of arv
    # (pre-screening doesn't have photo inspector). If PA shows flip-in-progress,
    # rehab likely already done by seller (we may not need to estimate as buyer).
    if rehab_budget_known is not None:
        rehab_estimate = rehab_budget_known
    else:
        # If listing is for renovated property (flip-in-progress), buyer rehab = 0
        flip_in_progress = False
        if pa_record:
            rs = pa_record.get("renovation_signal") or {}
            flip_in_progress = bool(rs.get("is_flip_in_progress"))
        if flip_in_progress:
            rehab_estimate = 0  # seller already renovated
        else:
            rehab_estimate = arv * 0.05 if arv else 0  # conservative 5% placeholder

    # ─── Max profitable offer (investor view) ──────────────────────────────
    max_offer_data = calculate_max_profitable_offer(
        arv=arv or purchase_price,
        rehab_estimate=rehab_estimate,
    )

    # ─── Multi-price payment table ─────────────────────────────────────────
    payment_table = calculate_payment_table(
        asking_price=purchase_price,
        max_offer=max_offer_data["max_offer"],
        down_payment=down_payment,
        tax_annual=tax_annual,
        insurance_annual=insurance_annual,
        hoa_monthly=hoa_monthly,
    )

    # ─── Live-in scenario (FHA/Conventional OO) ────────────────────────────
    live_in = calculate_live_in_scenario(
        purchase_price=purchase_price,
        down_payment=down_payment,
        loan_type=loan_type,
        years=30,
        tax_annual=tax_annual,
        insurance_annual=insurance_annual,
        hoa_monthly=hoa_monthly,
        rent_room_monthly=rent_room_monthly,
        monthly_income=monthly_income or 0,
        other_monthly_debts=other_monthly_debts,
    )

    # ─── Recommendation text ────────────────────────────────────────────────
    rec_offer = max_offer_data["max_offer"]
    asking = purchase_price
    if asking > 0 and rec_offer > 0:
        rec_pct_below_asking = (1 - rec_offer / asking) * 100
        midpoint = (asking + rec_offer) / 2
        recommendation = (
            f"Oferta recomendada: ${rec_offer:,.0f} ({rec_pct_below_asking:.0f}% bajo asking). "
            f"Midpoint negociacion: ${midpoint:,.0f}. Asking: ${asking:,.0f}. "
            f"Justificacion: {max_offer_data['justification']}"
        )
    else:
        recommendation = "Sin asking price valido; recomendacion no calculable."

    return {
        "inputs": {
            "asking_price": purchase_price,
            "arv": arv,
            "down_payment": down_payment,
            "monthly_income": monthly_income,
            "rent_room_monthly": rent_room_monthly,
            "other_monthly_debts": other_monthly_debts,
            "loan_type": loan_type,
            "hoa_monthly": hoa_monthly,
            "rehab_estimate_used": rehab_estimate,
            "tax_annual_from_pa": tax_annual,
            "insurance_annual_estimated": insurance_annual,
        },
        "max_profitable_offer": max_offer_data,
        "payment_table": payment_table,  # [{label, price, piti_monthly, ...}, ...]
        "live_in_scenario": live_in,     # incluye dti_evaluation si income provisto
        "recommendation": recommendation,
    }


# ════════════════════════════════════════════════════════════════════════════
# Persistence to property folder
# ════════════════════════════════════════════════════════════════════════════

def _persist_to_property_folder(deal: dict, result: dict) -> Optional[str]:
    """Save pre-screening result to properties/{state}/{county}/{type}/{id}/ folder.

    Each pre-screening run gets a timestamped JSON. Updates .meta.json with
    last_dd_run_at and last_dd_kind = 'pre_dd'.

    Returns folder path (str) for inclusion in result, or None if persist failed.
    """
    try:
        from properties_store import ensure_property_folder, save_json, write_meta
        from datetime import datetime, timezone
        folder = ensure_property_folder(deal)
        stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
        report_path = folder / "due_diligence" / f"pre_screening_{stamp}.json"
        save_json(report_path, result)
        write_meta(folder, last_dd_run_at=datetime.now(timezone.utc).isoformat(),
                   last_dd_kind="pre_dd", last_verdict=result.get("verdict"),
                   last_score=result.get("score"))
        return str(folder)
    except Exception as e:
        # Don't fail pre-screening if persist fails
        return None