"""price_validator.py - detecta discrepancia listing vs market estimates. PROBLEMA QUE RESUELVE: Deal con listing $70K mientras Zillow Zestimate dice $280K. Cap rate sale 18%. Sistema procede a calcular como deal normal sin detectar el RED FLAG mas obvio: ese precio bajo casi siempre indica problema oculto heredable (liens, foreclosure, damage, code violations, title issues, etc.). OBJETIVO: Detectar discrepancia >30% entre listing price y market value estimates. Alertar al usuario MASIVAMENTE (CRITICAL_RED_FLAG) con lista de razones probables y due diligence obligatoria. LOGICA: - discrepancy < 10% → NORMAL (listing dentro de rango razonable) - 10-30% → WARNING (listing fuera de rango pero no escandalo) - ≥30% → CRITICAL_RED_FLAG (algo huele mal — investigar antes de seguir) FUENTES de market value: 1. Zillow Zestimate (Firecrawl ~3 credits) — opt-in con ENABLE_FIRECRAWL_PRICE_CHECK 2. Redfin estimate (Firecrawl ~3 credits) — opt-in 3. Tax Assessed Value (county scraper, gratis cuando funcione) 4. Existing comps via property_value.py si ya estan computados FAIL-SOFT: si no hay ninguna fuente disponible, retorna status='unknown' con warning. """ from __future__ import annotations import os import re from datetime import datetime, timezone from typing import Optional from .base import FetcherError, USER_AGENT, DEFAULT_TIMEOUT # Thresholds NORMAL_THRESHOLD_PCT = 10.0 WARNING_THRESHOLD_PCT = 30.0 # Tax assessed → market value typical ratio in FL TAX_TO_MARKET_RATIO = 0.85 # tax assessed suele ser 85% del market value def _firecrawl_price_check_enabled() -> bool: """Flag separado de comps. Default OFF para no quemar credits.""" flag = os.getenv("ENABLE_FIRECRAWL_PRICE_CHECK", "false").lower() == "true" has_key = bool(os.getenv("FIRECRAWL_API_KEY", "").strip()) return flag and has_key # ═══════════════════════════════════════════════════════════════════════════ # Fetchers de market value # ═══════════════════════════════════════════════════════════════════════════ def fetch_zillow_zestimate(address: str) -> tuple[Optional[int], list[str]]: """Fetch Zillow Zestimate via Firecrawl scrape de la pagina de la propiedad. Returns (zestimate_value, errors). zestimate puede ser None si no se encuentra. Consume ~3 credits Firecrawl. """ errors: list[str] = [] if not _firecrawl_price_check_enabled(): errors.append( "Firecrawl price check deshabilitado. " "Setear ENABLE_FIRECRAWL_PRICE_CHECK=true en .env para activar." ) return None, errors try: from firecrawl import FirecrawlApp except ImportError as e: errors.append(f"firecrawl-py no importable: {e}") return None, errors api_key = os.getenv("FIRECRAWL_API_KEY", "").strip() if not api_key: errors.append("FIRECRAWL_API_KEY ausente en .env") return None, errors # Zillow address search url # Formato: https://www.zillow.com/homes/{address-with-dashes}_rb/ addr_slug = re.sub(r"[^\w\s]", "", address).replace(" ", "-") url = f"https://www.zillow.com/homes/{addr_slug}_rb/" try: app = FirecrawlApp(api_key=api_key) # Firecrawl SDK v2+: .scrape() (renamed from legacy .scrape_url()) result = app.scrape(url, formats=["markdown"]) if not result or not hasattr(result, "markdown"): errors.append("Firecrawl Zillow: respuesta vacia") return None, errors md = result.markdown or "" except Exception as e: errors.append(f"Firecrawl Zillow error: {e}") return None, errors # Parser: buscar patron "Zestimate" + precio cercano m = re.search( r"zestimate[^\$]*\$([\d,]{4,9})", md, re.IGNORECASE, ) if not m: errors.append("Zillow markdown OK pero patron 'Zestimate $XXX' no encontrado") return None, errors try: zestimate = int(m.group(1).replace(",", "")) if 20_000 <= zestimate <= 50_000_000: return zestimate, errors else: errors.append(f"Zestimate fuera de rango razonable: ${zestimate}") return None, errors except ValueError as e: errors.append(f"Parse Zestimate: {e}") return None, errors def fetch_redfin_estimate(address: str) -> tuple[Optional[int], list[str]]: """Similar a Zillow pero Redfin. ~3 credits Firecrawl.""" errors: list[str] = [] if not _firecrawl_price_check_enabled(): errors.append("Firecrawl price check deshabilitado (ENABLE_FIRECRAWL_PRICE_CHECK=true)") return None, errors try: from firecrawl import FirecrawlApp except ImportError as e: errors.append(f"firecrawl-py no importable: {e}") return None, errors api_key = os.getenv("FIRECRAWL_API_KEY", "").strip() if not api_key: errors.append("FIRECRAWL_API_KEY ausente") return None, errors # Redfin search por address addr_slug = re.sub(r"[^\w\s]", "", address).replace(" ", "-") url = f"https://www.redfin.com/?location={addr_slug}" try: app = FirecrawlApp(api_key=api_key) # Firecrawl SDK v2+: .scrape() (renamed from legacy .scrape_url()) result = app.scrape(url, formats=["markdown"]) md = result.markdown if result and hasattr(result, "markdown") else "" except Exception as e: errors.append(f"Firecrawl Redfin error: {e}") return None, errors m = re.search( r"redfin estimate[^\$]*\$([\d,]{4,9})", md, re.IGNORECASE, ) if not m: errors.append("Redfin: patron 'Redfin Estimate' no encontrado") return None, errors try: est = int(m.group(1).replace(",", "")) if 20_000 <= est <= 50_000_000: return est, errors return None, errors except ValueError: return None, errors # ═══════════════════════════════════════════════════════════════════════════ # Posibles razones de discrepancia (educacion al usuario) # ═══════════════════════════════════════════════════════════════════════════ # Bug 6: Hipotesis distressed ordenadas por likelihood en Florida. # Cuando listing es <$150K SFR + status=UNKNOWN, estas son las CAUSAS PRIMARIAS # que el sistema debe surface ANTES de las 12 razones generales. # Frecuencia derivada de datos publicos: ~60% de listings <$150K SFR en Florida # son foreclosure-related (auction, REO, pre-foreclosure short sale) o tax deed. DISTRESSED_HYPOTHESIS_REASONS = [ "🥇 FORECLOSURE AUCTION — listing puede ser el opening bid en la subasta judicial. " "Lookup obligatorio: lis pendens en CCIS del condado (clerk online).", "🥈 REO (Real Estate Owned) — el banco recupero la propiedad post-foreclosure y la " "lista as-is cash-quick-close. Comun en bancos chicos / credit unions.", "🥉 TAX DEED — el condado vendio el certificado por tax delinquency severa. " "1-year redemption period donde el ex-owner puede recomprar.", "Pre-foreclosure short sale — owner intenta vender antes de la subasta. " "Requiere aprobacion del lender (puede llevar 3-6 meses).", "Wholesale assignment — el wholesaler tiene el deal bajo contrato y vende el contrato. " "Puede haber issues con marketable title.", "Probate / estate sale — heirs liquidando rapido. Requiere certificado del juez.", ] POSSIBLE_RED_FLAG_REASONS = [ "Tax delinquency severa (property tax + interes acumulado puede ser >20% del valor)", "IRS lien sobre el owner (federal tax lien, 120-day right of redemption)", "Code enforcement violations grandes (municipalidad puede tener liens de $50K+)", "Foreclosure en curso (lis pendens publico) — el seller intenta vender antes de subasta", "Damage severo no fotografiado (fire, water, structural) que requiere $50K-$200K rehab", "Title issues (clouds en el chain — heirs no identificados, divorce sin completar, fraud)", "Bankruptcy quick-sale (trustee debe liquidar rapido, precio bajo para cerrar)", "Wholesaler problem deal (el wholesaler le bajo el precio porque tuvo issues con buyers anteriores)", "HOA litigation pendiente — lender no presta hasta resolver", "Open insurance claims que el buyer hereda", "Polybutylene plumbing + electrical Federal Pacific (re-pipe + repanel costoso)", "Inhabitable / no certificate of occupancy (puede ser ilegal alquilar tal como esta)", ] MANDATORY_INVESTIGATION_LIST = [ "Court records search (county clerk: lis pendens, foreclosure docket, civil suits)", "Tax collector / appraiser: verificar pagos al dia + assessed value", "Code enforcement check con la municipalidad: violations + liens", "Property records: chain of title del county recorder", "Title search profesional ($300-$500) ANTES de hacer oferta", "Drive-by inspection (sin entrar): nivel de mantenimiento exterior, signos de damage", "PACER bankruptcy search (federal): auto-stay del owner puede invalidar transferencia", "Permits buscador: openings sin cerrar pueden tener implicaciones legales", "Open insurance claims: pedir disclosure al seller", ] # ═══════════════════════════════════════════════════════════════════════════ # API publica # ═══════════════════════════════════════════════════════════════════════════ def validate_price( *, address: str, listing_price: float, tax_assessed_value: Optional[float] = None, existing_comps_estimate: Optional[float] = None, existing_comps_confidence: Optional[str] = None, # Bug 4: "high"|"medium"|"low"|None existing_comps_sources: Optional[list] = None, # Bug 4: list of source labels neighborhood_class: Optional[str] = None, # Bug 6: A|B|C|D|None use_firecrawl: Optional[bool] = None, ) -> dict: """Entry point. Valida listing_price contra fuentes de market value. Args: address: full address de la propiedad listing_price: precio listado tax_assessed_value: opcional, si ya se computo por property_value.py existing_comps_estimate: opcional, mid del estimated_value de property_value.py existing_comps_confidence: confidence level del estimate ("high"/"medium"/"low"). Si "low", el estimate NO se usa como baseline (Bug 4 fix). existing_comps_sources: lista de sources del property_value (para detectar heuristica-only). Bug 4: si solo viene de "Deductions", NO usar como baseline. neighborhood_class: A/B/C/D del Census ACS. Bug 6: si UNKNOWN + listing muy bajo en zona Class C/D, surface hipotesis foreclosure. use_firecrawl: si True, hace lookups Zillow + Redfin (consume credits). None → usa flag ENABLE_FIRECRAWL_PRICE_CHECK de .env Returns dict con: status: NORMAL | WARNING | CRITICAL_RED_FLAG | UNKNOWN listing_price, market_estimates {zillow, redfin, tax_implied, comps_mid} max_discrepancy_pct possible_reasons (list of str) — si CRITICAL_RED_FLAG o UNKNOWN+sospechoso mandatory_investigation (list of str) recommendation: brief one-liner fetched_at, sources_used, errors """ fetched_at = datetime.now(timezone.utc).isoformat() errors: list[str] = [] sources_used: list[str] = [] estimates: dict[str, Optional[int]] = { "zillow_zestimate": None, "redfin_estimate": None, "tax_implied_market": None, "comps_mid": None, } rejected_sources: list[str] = [] # Bug 4: tracking de sources descartadas # 1. Tax assessed → market implied (FL ratio ~85%) if tax_assessed_value and tax_assessed_value > 1000: estimates["tax_implied_market"] = int(tax_assessed_value / TAX_TO_MARKET_RATIO) sources_used.append(f"Tax assessed → market implied (${tax_assessed_value:,.0f} / {TAX_TO_MARKET_RATIO})") # 2. Existing comps estimate (de property_value.py) — CON VALIDACION DE CALIDAD (Bug 4) if existing_comps_estimate and existing_comps_estimate > 1000: # Reject if confidence is "low" — significa que property_value.py no tuvo # data real y cayo en fallback heuristico de deductions. Usarlo como # baseline produce direccion INVERTIDA (visto en Jacksonville test). is_heuristic_only = False if existing_comps_sources: srcs_str = " | ".join(str(s) for s in existing_comps_sources).lower() # Si la UNICA source es "Deductions por edad" → no es un comp real is_heuristic_only = ( ("deduction" in srcs_str or "heurística" in srcs_str or "heuristica" in srcs_str) and "comp" not in srcs_str and "tax" not in srcs_str and "zillow" not in srcs_str and "redfin" not in srcs_str ) if existing_comps_confidence == "low" or is_heuristic_only: rejected_sources.append( f"property_value comps_mid descartado: confidence={existing_comps_confidence}, " f"sources={existing_comps_sources} — fallback heuristico no es baseline valido" ) errors.append( "property_value estimate descartado por baja calidad (heuristic-only). " "Para validacion confiable: activar ENABLE_FIRECRAWL_COMPS o esperar tax_assessed scraper." ) else: estimates["comps_mid"] = int(existing_comps_estimate) sources_used.append(f"Comps mid (confidence={existing_comps_confidence or 'unknown'}, ${existing_comps_estimate:,.0f})") # 3. Firecrawl Zillow Zestimate if use_firecrawl is None: do_firecrawl = _firecrawl_price_check_enabled() else: do_firecrawl = use_firecrawl if do_firecrawl: z, z_errors = fetch_zillow_zestimate(address) if z: estimates["zillow_zestimate"] = z sources_used.append(f"Zillow Zestimate (${z:,.0f})") errors.extend(z_errors) r, r_errors = fetch_redfin_estimate(address) if r: estimates["redfin_estimate"] = r sources_used.append(f"Redfin Estimate (${r:,.0f})") errors.extend(r_errors) # 4. Calcular discrepancia available_estimates = [v for v in estimates.values() if v] if not available_estimates: # Bug 6: UNKNOWN-pero-listing-sospechosamente-bajo → surface hipotesis # distressed (foreclosure / tax_deed / REO / pre-foreclosure short sale). # Heuristica: listing < $150K + zona conocida por foreclosures FL # (Duval, Hillsborough, Polk, Marion, Brevard, Volusia, Lake) o sin info # de neighborhood = surface hypothesis. suspicious_low_listing = listing_price < 150_000 is_low_class_area = (neighborhood_class or "").upper() in ("C", "D") possible = [] investigation = [] recommendation_text = ( "No se pudo validar el precio contra fuentes de mercado confiables. " "Activar ENABLE_FIRECRAWL_PRICE_CHECK + ENABLE_FIRECRAWL_COMPS en .env " "o esperar el tax_assessed scraper para validacion automatica. " "Considera lookup manual en Zillow/Redfin antes de proceder." ) if suspicious_low_listing: # En USA real estate, listing <$150K SFR en Florida es estadisticamente raro # excepto en: (a) zonas Class D donde es market-rate (rare), (b) deals # distressed donde el listing es el opening bid o el "as-is cash quick close". # Surface las hipotesis distressed como PRIMER orden de explicacion. possible = DISTRESSED_HYPOTHESIS_REASONS + POSSIBLE_RED_FLAG_REASONS investigation = MANDATORY_INVESTIGATION_LIST if is_low_class_area: class_note = ( f"Vecindario Class {neighborhood_class} (income bajo) — listing en este rango " "puede ser market-rate. Pero foreclosure tampoco esta descartado: en Class D " "FL, el porcentaje de foreclosures es ~3x el promedio nacional." ) elif neighborhood_class in ("A", "B"): class_note = ( f"Vecindario Class {neighborhood_class} (income medio/alto) — listing tan bajo " "es PROBABLEMENTE deal distressed. Investigar lis pendens en CCIS antes de proceder." ) else: # neighborhood_class unknown class_note = ( "Neighborhood class no disponible — no se puede inferir si el listing es " "market-rate-para-la-zona o distressed." ) recommendation_text = ( f"⚠️ Listing ${listing_price:,.0f} es estadisticamente raro para SFR en Florida " f"(<$150K). {class_note} " "HIPOTESIS PRIMARIA: deal distressed (foreclosure, tax_deed, REO, short sale, " "pre-foreclosure). Re-verificar deal_type del usuario, hacer court records lookup " "(lis pendens en CCIS del condado), y tratar este analisis como PRELIMINAR hasta " "confirmar el status real." ) return { "status": "UNKNOWN", "listing_price": int(listing_price), "market_estimates": estimates, "max_discrepancy_pct": None, "min_discrepancy_pct": None, "possible_reasons": possible, "mandatory_investigation": investigation, "recommendation": recommendation_text, "sources_used": sources_used, "rejected_sources": rejected_sources, "suspicious_low_listing": suspicious_low_listing, "errors": errors, "fetched_at": fetched_at, } # Discrepancia % vs cada estimate (negativo = listing < market, positivo = listing > market) discrepancies = {} for src, val in estimates.items(): if val: disc_pct = (listing_price - val) / val * 100 discrepancies[src] = round(disc_pct, 1) # max ABS discrepancy = la mas alarmante (ya sea sobre o bajo el mercado) max_abs_disc = max(abs(d) for d in discrepancies.values()) # signed para reportar direccion signed_max = max(discrepancies.values(), key=abs) # Status if max_abs_disc < NORMAL_THRESHOLD_PCT: status = "NORMAL" recommendation = ( f"Listing dentro de ±{NORMAL_THRESHOLD_PCT}% de market estimates. " "Procede con analisis financiero estandar." ) possible_reasons = [] investigation = [] elif max_abs_disc < WARNING_THRESHOLD_PCT: status = "WARNING" direction = "sobre" if signed_max > 0 else "bajo" recommendation = ( f"Listing {abs(signed_max):.0f}% {direction} el market estimate. " "Verifica condiciones del deal antes de proceder. " "Si listing > market: probable inflación del seller. " "Si listing < market: investigar razon (motivacion legitima vs problema oculto)." ) possible_reasons = [] investigation = [] else: # CRITICAL_RED_FLAG status = "CRITICAL_RED_FLAG" direction = "sobre" if signed_max > 0 else "bajo" if signed_max < 0: # Listing < market — el caso peligroso de problema oculto recommendation = ( f"🚨 LISTING ${listing_price:,.0f} esta {abs(signed_max):.0f}% BAJO el market estimate. " "Esto NO es un 'gran deal' por default — es una RED FLAG masiva. " "El precio bajo casi siempre indica problema oculto heredable. " "NO procedas con analisis financiero estandar hasta entender el POR QUE del precio bajo. " "Cap rate alto en este contexto puede ser ilusion — los costos heredables pueden destruir el deal." ) possible_reasons = POSSIBLE_RED_FLAG_REASONS investigation = MANDATORY_INVESTIGATION_LIST else: # Listing > market — clasico seller inflacionado pero no peligroso recommendation = ( f"Listing ${listing_price:,.0f} esta {abs(signed_max):.0f}% SOBRE el market estimate. " "Probable inflacion del seller. Oferta agresiva justificada. " "Si declinan, walk away — hay deals mejores." ) possible_reasons = [] investigation = [] return { "status": status, "listing_price": int(listing_price), "market_estimates": estimates, "discrepancies_pct": discrepancies, "max_discrepancy_pct": round(max_abs_disc, 1), "signed_max_discrepancy_pct": round(signed_max, 1), "direction": "listing_BELOW_market" if signed_max < 0 else "listing_ABOVE_market" if signed_max > 0 else "match", "possible_reasons": possible_reasons, "mandatory_investigation": investigation, "recommendation": recommendation, "sources_used": sources_used, "errors": errors, "fetched_at": fetched_at, }