AR-House/data_fetchers/property_value.py

"""Property value fetcher para Wave 2 (ValueEstimator).

Objetivo: estimar valor real de una propiedad combinando:
1. Tax assessed value (gratis, county-specific scraping)
2. Comparables recently sold (Firecrawl, MAX 5 — OPT-IN para no quemar credits)
3. Deductions automaticas por edad del inmueble (AC, roof, plumbing, panel)

OUTPUT consolidado:
{
  "listing_price": int,
  "tax_assessed_value": int | None,
  "comps_used": [{address, sold_price, sold_date, sqft, $/sqft}, ...],
  "estimated_value": {"low": int, "mid": int, "high": int, "confidence": str},
  "price_per_sqft_comps_avg": float | None,
  "price_per_sqft_subject": float,
  "overpriced_pct": float | None,
  "inflation_score": float,  # 0-10
  "deductions": {"ac": int, "roof": int, "plumbing": int, "panel": int, "total": int},
  "market_trend": {"direction": str, "evidence": str},
  "sources_used": [...],
  "fetch_errors": [...],
}

FAIL-SOFT:
- Sin Firecrawl key o ENABLE_FIRECRAWL_COMPS=false → comps_used=[], confidence baja
- Sin tax assessed → tax_assessed_value=None, fallback a comps
- Sin nada → estimacion basada SOLO en deductions vs listing
"""

from __future__ import annotations

import os
import re
from datetime import datetime, timezone
from typing import Optional

from .base import FetcherError, USER_AGENT, DEFAULT_TIMEOUT


# ─── Deductions por edad del inmueble (Florida real estate norms) ──────────
DEDUCTION_AC = 6_000              # AC central viejo (<2010) si no hay evidencia de uno nuevo
DEDUCTION_ROOF = 10_000           # Roof shingle viejo (<2005)
DEDUCTION_PLUMBING_POLYBUTYLENE = 12_000  # Polybutylene plumbing risk (1978-1995 FL)
DEDUCTION_ELECTRICAL_PANEL = 5_000        # Federal Pacific / Zinsco panels (<1990)

# ─── Keywords para detectar items renovados (suprimir deduccion correspondiente) ──
# Bug fix 2026-05-15: el sistema antes solo chequeaba "new ac"/"ac nuevo".
# Ahora detecta variantes reales que aparecen en listings: "BRAND NEW ROOF",
# "Updated/Remodeled", "Fully updated throughout", "AC replaced 2023", etc.
NEW_ITEM_KEYWORDS = {
    "ac": [
        "new ac", "new a/c", "new hvac", "new a.c.", "new air conditioning",
        "ac replaced", "a/c replaced", "hvac replaced", "ac unit replaced",
        "new air handler", "ac nuevo", "a/c nuevo", "hvac nuevo",
        "ac (20", "ac 20", "a/c (20",  # "AC (2023)" or "AC 2023"
        "newer ac", "newer a/c", "newer hvac",
        "recently replaced ac", "recently replaced a/c", "recently replaced hvac",
    ],
    "roof": [
        "new roof", "brand new roof", "brand-new roof", "newer roof",
        "roof replaced", "roof recently replaced", "recent roof",
        "roof 20", "roof (20", "new shingles", "metal roof installed",
        "roof installed 20", "roof nuevo", "techo nuevo",
        "replaced roof", "roof was replaced", "roof replacement",
    ],
    "plumbing": [
        "re-piped", "repiped", "re piped", "pex plumbing", "new plumbing",
        "plumbing replaced", "plumbing nuevo", "fully repiped",
        "copper plumbing", "plumbing updated", "new pipes",
    ],
    "panel": [
        "new panel", "panel upgraded", "panel replaced", "new electrical",
        "electrical updated", "200 amp", "new wiring", "rewired",
        "panel nuevo", "panel electrico nuevo", "upgraded electrical",
    ],
}

# Keywords globales que indican "renovada completa" → CERO deducciones
RENOVATED_GLOBAL_KEYWORDS = [
    "updated/remodeled",  # Zillow explicit condition tag
    "fully updated", "fully renovated", "completely renovated", "completely remodeled",
    "move-in ready", "move in ready", "turnkey", "turn key", "turn-key",
    "totally renovated", "totally updated",
    "renovacion completa", "completamente renovada", "lista para mudarse",
    "newly renovated", "newly remodeled",
    "fully remodeled",
]

# Keywords de condition_status que indican propiedad renovada
RENOVATED_CONDITION_VALUES = {
    "updated/remodeled", "remodeled", "renovated", "updated",
    "new construction", "newly built",
}


def _matches_any_keyword(text: str, keywords: list[str]) -> Optional[str]:
    """Returns first matched keyword (case-insensitive substring), or None."""
    if not text:
        return None
    text_lower = text.lower()
    for kw in keywords:
        if kw.lower() in text_lower:
            return kw
    return None


# ─── Comps estimation ──────────────────────────────────────────────────────
COMPS_MAX_COUNT = 5
COMPS_LOOKBACK_DAYS = 180
COMPS_SQFT_TOLERANCE_PCT = 0.15
PRICE_LOW_PCT = 0.92   # estimated_value['low']  = mid * 0.92
PRICE_HIGH_PCT = 1.08  # estimated_value['high'] = mid * 1.08


def calculate_age_deductions(
    year_built: int,
    photo_findings_text: str = "",
    listing_description: str = "",
    condition_status: str = "",
    features_special: Optional[list] = None,
) -> dict:
    """Deducciones automaticas segun edad de la propiedad.

    BUG FIX 2026-05-15: antes solo chequeaba keywords minimos en photo_findings_text.
    Ahora respeta condition_status, listing_description, y features_special tags.

    Args:
        year_built: año de construccion. Si <= 0 → ZERO deductions.
        photo_findings_text: output del PhotoInspector (puede mencionar AC nuevo, etc.)
        listing_description: full description del listing Zillow/Realtor
        condition_status: Zillow "condition" tag (e.g. "Updated/Remodeled", "Original")
        features_special: array de tags del "What's special" Zillow box
                          (e.g. ["BRAND NEW ROOF", "Fresh paint", "NEW AC"])

    Returns:
        {ac, roof, plumbing, panel, total, _skipped_global: bool,
         _suppressed_items: [str], _reasons: {item: kw_matched}}

        Si condition_status indica renovada O description tiene keywords globales
        → TODO 0, _skipped_global=True.

        Per-item: si features_special O description menciona NEW ROOF/AC/etc,
        suprime esa deduccion especifica.
    """
    deductions = {"ac": 0, "roof": 0, "plumbing": 0, "panel": 0}
    suppressed: list[str] = []
    reasons: dict[str, str] = {}

    # Combinar todos los textos en uno solo para keyword matching
    combined_text = " ".join([
        photo_findings_text or "",
        listing_description or "",
        " ".join(features_special or []),
    ])

    # 1. CHECK GLOBAL: si condition status es renovada → CERO deducciones
    cs_lower = (condition_status or "").lower().strip()
    if cs_lower in RENOVATED_CONDITION_VALUES:
        return {
            **deductions, "total": 0,
            "_skipped_global": True,
            "_skip_reason": f"condition_status='{condition_status}' (Zillow tag)",
            "_suppressed_items": list(deductions.keys()),
            "_reasons": {k: f"condition={condition_status}" for k in deductions.keys()},
        }

    # 2. CHECK GLOBAL: si description o features mencionan "fully updated"/"move-in ready"
    global_kw = _matches_any_keyword(combined_text, RENOVATED_GLOBAL_KEYWORDS)
    if global_kw:
        return {
            **deductions, "total": 0,
            "_skipped_global": True,
            "_skip_reason": f"keyword '{global_kw}' detected in listing",
            "_suppressed_items": list(deductions.keys()),
            "_reasons": {k: f"keyword:{global_kw}" for k in deductions.keys()},
        }

    # 3. PER-ITEM: aplicar deduccion solo si edad gatilla Y no hay keyword item-specific
    if not year_built or year_built <= 0:
        deductions["total"] = 0
        deductions["_skipped_global"] = False
        deductions["_suppressed_items"] = []
        deductions["_reasons"] = {}
        return deductions

    # AC: edad <2010 y NO hay keyword "new ac"
    if year_built < 2010:
        ac_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["ac"])
        if ac_kw:
            suppressed.append("ac")
            reasons["ac"] = f"keyword:{ac_kw}"
        else:
            deductions["ac"] = DEDUCTION_AC

    # Roof: edad <2005 y NO hay keyword "new roof"
    if year_built < 2005:
        roof_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["roof"])
        if roof_kw:
            suppressed.append("roof")
            reasons["roof"] = f"keyword:{roof_kw}"
        else:
            deductions["roof"] = DEDUCTION_ROOF

    # Plumbing polybutylene: edad 1978-1995 y NO hay keyword "repiped"
    if 1978 <= year_built <= 1995:
        pl_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["plumbing"])
        if pl_kw:
            suppressed.append("plumbing")
            reasons["plumbing"] = f"keyword:{pl_kw}"
        else:
            deductions["plumbing"] = DEDUCTION_PLUMBING_POLYBUTYLENE

    # Electrical panel: edad <1990 y NO hay keyword "new panel"
    if year_built < 1990:
        panel_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["panel"])
        if panel_kw:
            suppressed.append("panel")
            reasons["panel"] = f"keyword:{panel_kw}"
        else:
            deductions["panel"] = DEDUCTION_ELECTRICAL_PANEL

    deductions["total"] = sum(v for k, v in deductions.items() if k in ("ac", "roof", "plumbing", "panel"))
    deductions["_skipped_global"] = False
    deductions["_suppressed_items"] = suppressed
    deductions["_reasons"] = reasons
    return deductions


# ═══════════════════════════════════════════════════════════════════════════
# Tax Assessed Value (county-specific scrapers)
# ═══════════════════════════════════════════════════════════════════════════

def fetch_tax_assessed_miami_dade(address: str) -> Optional[dict]:
    """Stub para scraper Miami-Dade property appraiser.

    TODO Wave 2 follow-up: implementar Playwright scraping de:
        https://www.miamidade.gov/pa/property_search.asp
    Steps:
      1. Search by address
      2. Parse results, click property card
      3. Extract assessed_value, market_value, sale_history
    Tiempo estimado: ~30s por lookup. Cache 30 dias.

    Por ahora retorna None — el agente trabaja con comps + deductions sin tax assessed.
    """
    return None


# Set explicito de counties que TIENEN scraper real (no stub).
# Hoy: Broward funciona via pa_broward.py (full PA record extraction).
# Pendiente: Miami-Dade, Palm Beach, Orange, Hillsborough, Pinellas... (custom adapters)
#           qPublic (~30 rurales) — bloqueado por Cloudflare
_TAX_ASSESSED_IMPLEMENTED_COUNTIES: set[str] = {
    "broward",
}


def is_tax_assessed_supported(county_name: Optional[str], state: Optional[str]) -> bool:
    """True si tenemos scraper real para este county. False si es stub o no esta.

    El orchestrator usa esto para distinguir 'no implementado' (decisión nuestra)
    de 'not found' (buscamos y no estaba). NO mentir al usuario.
    """
    if not county_name or state != "FL":
        return False
    cname = county_name.lower().replace(" county", "").strip()
    return cname in _TAX_ASSESSED_IMPLEMENTED_COUNTIES


def fetch_tax_assessed(
    address: str,
    county_name: Optional[str],
    state: Optional[str],
    parcel_id: Optional[str] = None,
) -> Optional[dict]:
    """Router por condado para Property Appraiser data.

    Args:
        address: street address (used by counties whose adapter supports address search)
        county_name: e.g. "Broward", "Miami-Dade"
        state: must be "FL" today
        parcel_id: county-specific folio number (required for Broward; preferred
                   for all counties since address matching is often fuzzy on PA sites)

    Returns:
        Rich dict with legacy keys (assessed_value, market_value, year_built, sqft,
        owner_name, source) plus extended fields when adapter supports them
        (sales_history, mailing_address, tax_breakdown, photo_url, etc.).
        None if county not implemented OR adapter returned empty result.

    Use is_tax_assessed_supported() to distinguish "not implemented" vs "not found".
    """
    if not is_tax_assessed_supported(county_name, state):
        return None
    cname = (county_name or "").lower().replace(" county", "").strip()

    if cname == "broward":
        if not parcel_id:
            # Broward needs folio — we can't do reliable address search yet
            return None
        return _fetch_broward(parcel_id)

    if "miami-dade" in cname or "miami dade" in cname:
        return fetch_tax_assessed_miami_dade(address)

    # Defensive: not reachable if is_tax_assessed_supported is in sync
    return None


def _fetch_broward(parcel_id: str) -> Optional[dict]:
    """Broward adapter: pa_broward.py rich record → legacy contract + extensions."""
    try:
        from data_fetchers.pa_broward import fetch_broward_pa_record
    except ImportError:
        return None

    rec = fetch_broward_pa_record(parcel_id)
    if not rec or rec.get("errors") and not rec.get("just_value_current"):
        # Broward adapter failed AND no fallback data → return None
        return None

    cy = rec.get("current_year", {})
    ly = rec.get("last_year", {})

    # Concatenate owner_name + owner_name_2 if continuation exists (e.g. "BANK OF AMERICA NA TRSTEE" + "% CORP REAL ESTATE ASSMT")
    owner_full = rec.get("owner_name", "") or ""
    if rec.get("owner_name_2"):
        owner_full = f"{owner_full} {rec['owner_name_2']}".strip()

    # Parse beds/baths from "1 / 3 / 2.50" format (units/beds/baths)
    beds = baths = None
    ubb = (rec.get("units_beds_baths") or "").split("/")
    if len(ubb) >= 3:
        try:
            beds_raw = ubb[1].strip()
            beds = int(beds_raw) if beds_raw and beds_raw.replace(".", "").isdigit() else None
        except (ValueError, IndexError):
            pass
        try:
            baths_raw = ubb[2].strip()
            baths = float(baths_raw) if baths_raw and baths_raw.replace(".", "").isdigit() else None
        except (ValueError, IndexError):
            pass

    # ─── Legacy contract (back-compat with existing orchestrator/LLM payload) ──
    return {
        # Required legacy keys
        "assessed_value": cy.get("assessed_value") or ly.get("assessed_value"),
        "market_value": cy.get("just_value") or ly.get("just_value"),
        "just_value": cy.get("just_value") or ly.get("just_value"),
        "year_built": rec.get("year_built"),
        "sqft": rec.get("under_air_sqft") or rec.get("adj_bldg_sqft"),
        "beds": beds,
        "baths": baths,
        "owner_name": owner_full,
        "source": "bcpa.net",
        # ─── Extended fields (Property Snapshot Report inputs) ────────────────
        "folio_number": rec.get("folio_number"),
        "mailing_address": rec.get("mailing_address"),
        "situs_address": rec.get("situs_address"),
        "neighborhood": rec.get("neighborhood"),
        "use_code": rec.get("use_code"),
        "millage_code": rec.get("millage_code"),
        "legal_description": rec.get("legal_description"),
        "adj_bldg_sqft": rec.get("adj_bldg_sqft"),
        "under_air_sqft": rec.get("under_air_sqft"),
        "effective_year": rec.get("effective_year"),
        "homestead_active": rec.get("homestead_active", False),
        "taxes_paid_last_year": ly.get("taxes_paid"),
        "tax_year_last": ly.get("tax_year"),
        "tax_year_current": cy.get("tax_year"),
        "current_year_values": cy,
        "last_year_values": ly,
        "two_years_ago_values": rec.get("two_years_ago", {}),
        "tax_breakdown": rec.get("tax_breakdown", {}),
        "sales_history": rec.get("sales_history", []),
        "photo_url": rec.get("photo_url"),
        "source_url": rec.get("source_url"),
        "source_api_url": rec.get("source_api_url"),
        "fetched_at": rec.get("fetched_at"),
    }


# ═══════════════════════════════════════════════════════════════════════════
# Firecrawl comps (OPT-IN para no quemar credits)
# ═══════════════════════════════════════════════════════════════════════════

def _firecrawl_enabled() -> bool:
    """Check env flag + key presence."""
    flag = os.getenv("ENABLE_FIRECRAWL_COMPS", "false").lower() == "true"
    has_key = bool(os.getenv("FIRECRAWL_API_KEY", "").strip())
    return flag and has_key


def fetch_zillow_comps(
    zip_code: str,
    beds: int,
    baths: float,
    sqft: int,
    max_count: int = COMPS_MAX_COUNT,
) -> tuple[list[dict], list[str]]:
    """Fetch recently sold comps via Firecrawl scrape de Zillow.

    Returns (comps_list, errors). Comps list capped at max_count.
    Cada comp: {address, sold_price, sold_date_text, sqft, price_per_sqft}

    OPT-IN: requiere ENABLE_FIRECRAWL_COMPS=true en .env.
    """
    errors: list[str] = []

    if not _firecrawl_enabled():
        errors.append(
            "Firecrawl comps deshabilitado. Setear ENABLE_FIRECRAWL_COMPS=true en .env para activar."
        )
        return [], errors

    try:
        from firecrawl import FirecrawlApp
    except ImportError as e:
        errors.append(f"firecrawl-py no importable: {e}")
        return [], errors

    api_key = os.getenv("FIRECRAWL_API_KEY", "").strip()
    if not api_key:
        errors.append("FIRECRAWL_API_KEY ausente en .env")
        return [], errors

    # Zillow recently sold URL para ZIP
    url = f"https://www.zillow.com/homes/recently_sold/{zip_code}_rb/"

    try:
        app = FirecrawlApp(api_key=api_key)
        # Firecrawl SDK v2+: .scrape() (renamed from legacy .scrape_url()).
        # Returns Document object with .markdown attribute on success.
        result = app.scrape(url, formats=["markdown"])
        if not result or not hasattr(result, "markdown"):
            errors.append("Firecrawl devolvio resultado vacio")
            return [], errors
        md = result.markdown or ""
    except Exception as e:
        errors.append(f"Firecrawl scrape error: {e}")
        return [], errors

    # Parser best-effort del markdown de Zillow.
    # Buscar bloques con: $price + sqft + address + sold date
    # Patrones tipicos en markdown de Zillow recently sold:
    #   "$485,000" ... "1,450 sqft" ... "123 Main St" ... "Sold X/Y/Z"
    comps = _parse_zillow_markdown(md, beds=beds, baths=baths, sqft_target=sqft)
    if not comps:
        errors.append("Firecrawl OK pero parser no extrajo comps (Zillow cambio formato?)")

    return comps[:max_count], errors


def _parse_zillow_markdown(md: str, beds: int, baths: float, sqft_target: int) -> list[dict]:
    """Best-effort parser de Zillow markdown.

    Extrae bloques con price + sqft + address. Tolera variaciones.
    """
    comps = []
    # Buscar todos los matches de precio + sqft cercanos
    # Pattern: $XXX,XXX (con o sin centavos) seguido en proximidad de "X,XXX sqft" o "X bd"
    price_pattern = re.compile(r"\$([\d,]{3,9})", re.IGNORECASE)
    sqft_pattern = re.compile(r"([\d,]{3,5})\s*sq(?:\.|uare)?\s*ft", re.IGNORECASE)
    sold_pattern = re.compile(r"(sold|vendido)[\s:]+([0-9/.-]+)", re.IGNORECASE)
    bed_pattern = re.compile(r"([\d.]+)\s*(?:bd|bed)", re.IGNORECASE)

    # Segmentar md en bloques de ~500 chars que probablemente contengan 1 listing
    segments = re.split(r"\n\n+|---+", md)
    for seg in segments:
        if len(seg) < 50 or len(seg) > 2000:
            continue
        prices = price_pattern.findall(seg)
        sqfts = sqft_pattern.findall(seg)
        if not prices or not sqfts:
            continue
        try:
            price = int(prices[0].replace(",", ""))
            sqft = int(sqfts[0].replace(",", ""))
        except (ValueError, IndexError):
            continue
        # Filtro: precio razonable para single home
        if price < 30_000 or price > 5_000_000:
            continue
        if sqft < 400 or sqft > 8_000:
            continue
        # Filtro: sqft cerca del subject (+/- COMPS_SQFT_TOLERANCE_PCT)
        if sqft_target > 0:
            ratio = sqft / sqft_target
            if ratio < (1 - COMPS_SQFT_TOLERANCE_PCT) or ratio > (1 + COMPS_SQFT_TOLERANCE_PCT):
                continue

        sold_match = sold_pattern.search(seg)
        sold_date = sold_match.group(2) if sold_match else "?"

        bed_match = bed_pattern.search(seg)
        comp_beds = bed_match.group(1) if bed_match else "?"

        # Address best-effort: primera linea o primer link
        addr_match = re.search(r"\[([^\]]+(?:St|Ave|Rd|Dr|Ln|Way|Blvd|Ct|Ter|Pl)[^\]]*)\]", seg, re.IGNORECASE)
        address = addr_match.group(1) if addr_match else "(direccion no parseada)"

        comps.append({
            "address": address,
            "sold_price": price,
            "sold_date": sold_date,
            "sqft": sqft,
            "beds_text": comp_beds,
            "price_per_sqft": round(price / sqft, 2) if sqft > 0 else 0,
        })

    return comps


# ═══════════════════════════════════════════════════════════════════════════
# Estimacion de valor y inflation score
# ═══════════════════════════════════════════════════════════════════════════

def estimate_value_from_comps(comps: list[dict], subject_sqft: int) -> tuple[Optional[int], Optional[float]]:
    """Calcula valor estimado mid + $/sqft promedio de comps."""
    if not comps or subject_sqft <= 0:
        return None, None
    prices_per_sqft = [c["price_per_sqft"] for c in comps if c.get("price_per_sqft", 0) > 0]
    if not prices_per_sqft:
        return None, None
    avg_ppsqft = sum(prices_per_sqft) / len(prices_per_sqft)
    estimated_mid = int(avg_ppsqft * subject_sqft)
    return estimated_mid, round(avg_ppsqft, 2)


def calculate_inflation_score(listing_price: float, estimated_mid: float) -> float:
    """Score 0-10 de cuanto el listing esta sobre el valor estimado.

    0 = listing al valor o por debajo (subvaluado)
    5 = listing 10% sobre valor
    10 = listing 30%+ sobre valor (gravemente inflado)
    """
    if estimated_mid <= 0:
        return 5.0  # default medium
    overpriced = (listing_price - estimated_mid) / estimated_mid
    if overpriced <= 0:
        return 0.0
    if overpriced >= 0.30:
        return 10.0
    return round(overpriced / 0.30 * 10, 1)


# ═══════════════════════════════════════════════════════════════════════════
# API publica
# ═══════════════════════════════════════════════════════════════════════════

def fetch_property_value(
    *,
    address: str,
    listing_price: float,
    sqft: int,
    beds: int,
    baths: float,
    year_built: int,
    zip_code: Optional[str] = None,
    county_name: Optional[str] = None,
    state: Optional[str] = None,
    photo_findings_text: str = "",
    listing_description: str = "",
    condition_status: str = "",
    features_special: Optional[list] = None,
    include_firecrawl_comps: Optional[bool] = None,
) -> dict:
    """Entry point. Combina tax assessed + comps + deductions en un dict consolidado.

    Bug fix 2026-05-15: ahora acepta listing_description, condition_status,
    features_special para que calculate_age_deductions pueda detectar listings
    renovados (e.g. condition='Updated/Remodeled' o description='Fully updated
    throughout, BRAND NEW ROOF, NEW AC') y suprimir deducciones falsas.

    include_firecrawl_comps:
        None  → usa el flag ENABLE_FIRECRAWL_COMPS de .env (default false)
        True  → fuerza llamada a Firecrawl (consume credits)
        False → omite Firecrawl
    """
    fetched_at = datetime.now(timezone.utc).isoformat()
    sources_used: list[str] = []
    errors: list[str] = []

    # 1. Deductions por edad (siempre, gratis) — ahora respeta condition + keywords
    deductions = calculate_age_deductions(
        year_built=year_built,
        photo_findings_text=photo_findings_text,
        listing_description=listing_description,
        condition_status=condition_status,
        features_special=features_special,
    )
    if deductions["total"] > 0:
        sources_used.append("Deductions por edad (heuristica FL)")
    elif deductions.get("_skipped_global"):
        sources_used.append(
            f"Deductions SKIPPED (renovated: {deductions.get('_skip_reason', '?')})"
        )

    # 2. Tax assessed (stub Miami-Dade por ahora)
    tax_assessed_data = fetch_tax_assessed(address, county_name, state)
    tax_assessed_value = None
    if tax_assessed_data:
        tax_assessed_value = tax_assessed_data.get("assessed_value")
        sources_used.append(f"Tax assessed ({county_name})")
    else:
        errors.append(f"Tax assessed no disponible para {county_name or '?'} (scraper pendiente Wave 2 follow-up)")

    # 3. Firecrawl comps (opt-in)
    if include_firecrawl_comps is None:
        do_firecrawl = _firecrawl_enabled()
    else:
        do_firecrawl = include_firecrawl_comps

    comps: list[dict] = []
    if do_firecrawl and zip_code:
        comps, comp_errors = fetch_zillow_comps(zip_code, beds, baths, sqft)
        errors.extend(comp_errors)
        if comps:
            sources_used.append(f"Zillow recently sold ({len(comps)} comps via Firecrawl)")

    # 4. Calcular valor estimado
    estimated_mid_from_comps, avg_ppsqft = estimate_value_from_comps(comps, sqft)
    price_per_sqft_subject = round(listing_price / sqft, 2) if sqft > 0 else 0

    # Combinar tax_assessed + comps + deductions para mid estimate
    candidates_mid = []
    if estimated_mid_from_comps:
        candidates_mid.append(estimated_mid_from_comps)
    if tax_assessed_value:
        # Tax assessed en FL suele ser ~85% del market value
        candidates_mid.append(int(tax_assessed_value / 0.85))

    if candidates_mid:
        estimated_mid = int(sum(candidates_mid) / len(candidates_mid)) - deductions["total"]
    else:
        # Fallback: listing - deductions, con muy low confidence
        estimated_mid = max(0, int(listing_price) - deductions["total"])

    estimated_mid = max(estimated_mid, 1)  # nunca zero

    estimated_low = int(estimated_mid * PRICE_LOW_PCT)
    estimated_high = int(estimated_mid * PRICE_HIGH_PCT)

    # Confidence segun fuentes disponibles
    if comps and tax_assessed_value:
        confidence = "high"
    elif comps or tax_assessed_value:
        confidence = "medium"
    else:
        confidence = "low"

    overpriced_pct = None
    if estimated_mid > 0:
        overpriced_pct = round((listing_price - estimated_mid) / estimated_mid * 100, 1)

    inflation_score = calculate_inflation_score(listing_price, estimated_mid)

    return {
        "listing_price": int(listing_price),
        "tax_assessed_value": tax_assessed_value,
        "comps_used": comps,
        "estimated_value": {
            "low": estimated_low,
            "mid": estimated_mid,
            "high": estimated_high,
            "confidence": confidence,
        },
        "price_per_sqft_comps_avg": avg_ppsqft,
        "price_per_sqft_subject": price_per_sqft_subject,
        "overpriced_pct": overpriced_pct,
        "inflation_score": inflation_score,
        "deductions": deductions,
        "market_trend": {
            "direction": "unknown",
            "evidence": "Para detectar tendencia requiere historial de comps (no implementado en MVP)",
        },
        "sources_used": sources_used,
        "fetch_errors": errors,
        "firecrawl_used": do_firecrawl and bool(comps),
        "fetched_at": fetched_at,
    }