Files
AR-House/orchestrator.py
T
2026-07-03 12:24:58 -04:00

1213 lines
51 KiB
Python

"""
Orquestador de AR-House.
Ejecuta los 5 agentes de analisis EN SECUENCIA (low_power mode):
cada modelo se carga, responde, y se descarga (keep_alive=0) antes de
cargar el siguiente. Esto mantiene el pico de VRAM en 1 modelo a la vez.
EmailComposer se invoca aparte (on-demand) via compose_email().
"""
from __future__ import annotations
import json
import os
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Callable, Optional
import ollama
from config import RESOURCE_MODE, KEEP_ALIVE_BY_MODE, ANALYSES_DIR
from data_fetchers.runner import fetch_all
from data_fetchers.price_validator import validate_price # Bug 2: detect listing vs market discrepancy
from finance_calculator import compute_all_scenarios, build_calculated_block
from prompt_templates import (
build_photo_prompt, # legacy, no usado en analyze_deal
build_photo_prompt_single,
build_photo_consolidation_prompt,
build_deal_prompt,
build_research_prompt,
build_lender_prompt,
build_synthesis_prompt,
build_value_prompt, # Wave 2
build_offer_prompt, # Wave 2
build_glossary_prompt,
build_email_prompt,
)
from data_fetchers.property_value import fetch_property_value # Wave 2
StatusCallback = Optional[Callable[[str], None]]
# ---------------------------------------------------------------------------
# Dataclasses
# ---------------------------------------------------------------------------
@dataclass
class DealInputs:
address: str
price: float
rent: float
property_tax: float
insurance: float
hoa: float
sqft: int
beds: int
baths: float
year_built: int
arv: float
rehab_override: Optional[float] = None
# NEW (Fix #2/#3): deal_type controla si se evalua ESCENARIO 5 AUCTION
# Valores: "mls" (default) | "off_market" | "auction" | "foreclosure" | "tax_deed" | "reo"
deal_type: str = "mls"
# NEW (auction deals): case_number = numero de expediente judicial / auction ID.
# Sirve para que el usuario haga el lookup en el county clerk. Solo aplica si deal_type != "mls".
case_number: Optional[str] = None
# NEW: estado de ocupacion (afecta eviction reserve + timeline).
# Valores: "vacant" | "occupied" | "unknown" | None (no inspeccionado)
occupancy_status: Optional[str] = None
# NEW (Property Type bugfix): tipo de propiedad. Default SFR pero LAND/COMMERCIAL/
# etc tienen pipeline distinto (no aplican DSCR/Cap Rate del SFR rental).
# Valores: "sfr" (default) | "condo" | "townhome" | "multi_family" | "land" |
# "mobile_home" | "commercial"
property_type: str = "sfr"
# Descripcion opcional del listing (usada por detect_property_type_anomalies para
# buscar keywords como "vacant lot", "buildable", "land for sale", etc.)
listing_description: Optional[str] = None
@dataclass
class BuyerProfile:
profile_class: str # "A" / "B" / "C" / "D"
fico: Optional[int] = None
llc_seasoning_years: Optional[float] = None
capital_available: Optional[float] = None
experience_deals: Optional[int] = None
nationality: Optional[str] = None
visa_status: Optional[str] = None
@dataclass
class AgentResult:
agent: str
output: str
seconds: float
tokens: int = 0
error: Optional[str] = None
@dataclass
class AnalysisResult:
deal: dict
profile: dict
photo: dict
deal_analysis: dict
research: dict
lender: dict
final: dict
started_at: str
finished_at: str
total_seconds: float
verified_data: dict = field(default_factory=dict) # snapshot Wave 1+1.5B fetchers
executive_briefing: dict = field(default_factory=dict) # ContextualGlossaryAgent output
computed_scenarios: dict = field(default_factory=dict) # finance_calculator.py output (Fix #3)
# Wave 2
property_value_data: dict = field(default_factory=dict) # fetch_property_value() output
value_estimate: dict = field(default_factory=dict) # ValueEstimator agent output
offer_strategy: dict = field(default_factory=dict) # OfferStrategist agent output
# Bug 2: Price Discrepancy Detection (red flag PRE-analisis)
price_validation: dict = field(default_factory=dict) # validate_price() output
# NEW: Property type detection (LAND bugfix)
# Si detection sugiere LAND/COMMERCIAL/etc y el usuario dijo "sfr",
# esto contiene warnings + suggested_type, y el pipeline BYPASSA
# los agentes de SFR (DealAnalyzer/LenderMatcher/ValueEstimator/OfferStrategist).
property_type_warning: dict = field(default_factory=dict)
pipeline_mode: str = "full_sfr" # "full_sfr" | "land_simplified" | otros futuros
# ---------------------------------------------------------------------------
# Helpers internos
# ---------------------------------------------------------------------------
def _emit(cb: StatusCallback, msg: str) -> None:
if cb is not None:
cb(msg)
def _extract_rehab_estimate_from_output(photo_output: str) -> Optional[float]:
"""Best-effort: extrae el rehab medio del reporte del PhotoInspector.
Busca patrones como "rehab $25K-$32K" o "$28,000" en el output.
Si no encuentra, devuelve None.
"""
if not photo_output:
return None
import re
# Buscar rangos como "$25K-$32K" o "$25,000-$32,000"
range_match = re.search(
r"(?:rehab|costo[^\$\d]{0,30})?[\$]?([\d,]+)(?:K)?\s*[-a]\s*[\$]?([\d,]+)(?:K)?",
photo_output,
re.IGNORECASE,
)
if range_match:
try:
low_str = range_match.group(1).replace(",", "")
high_str = range_match.group(2).replace(",", "")
low = float(low_str) * (1000 if "K" in range_match.group(0).upper() else 1)
high = float(high_str) * (1000 if "K" in range_match.group(0).upper() else 1)
# Sanity: si los valores son > 1M, probablemente el regex pico precios/ARV
if 1000 <= (low + high) / 2 <= 200_000:
return (low + high) / 2
except (ValueError, AttributeError):
pass
return None
def _no_photo_message(deal: DealInputs) -> str:
if deal.rehab_override is not None:
return (
f"No se subieron fotos. Usando rehab override del usuario: "
f"${deal.rehab_override:,.0f} USD. Categoria asumida segun monto."
)
return (
"No se subieron fotos ni rehab override. DealAnalyzer asumira "
"rehab default de $25,000 USD (LIGHT) salvo que indique otra cosa."
)
# ---------------------------------------------------------------------------
# Property Type Detection (LAND bugfix)
# ---------------------------------------------------------------------------
# Keywords en listing_description que sugieren LAND
_LAND_KEYWORDS = (
"vacant lot", "vacant land", "buildable", "land for sale", "raw land",
"tear down", "teardown", "build your", "ready to build", "vacant parcel",
"raw lot", "empty lot", "undeveloped", "wooded lot", "cleared lot",
"lote vacante", "terreno", "terreno baldio",
)
# Keywords que sugieren COMMERCIAL
_COMMERCIAL_KEYWORDS = (
"commercial", "office", "warehouse", "industrial", "retail space",
"strip mall", "storefront", "mixed use", "investment property nnn",
)
# Keywords que sugieren MOBILE HOME
_MOBILE_HOME_KEYWORDS = (
"mobile home", "manufactured home", "trailer home", "double wide", "single wide",
)
def _analyze_land_simplified(
*,
deal: DealInputs,
profile: BuyerProfile,
property_type_warning: dict,
started_at: datetime,
t0: float,
status_cb: StatusCallback,
) -> "AnalysisResult":
"""
Pipeline LAND-simplified: bypasea DealAnalyzer/LenderMatcher/ValueEstimator/OfferStrategist
(que asumen SFR rentable) y solo corre:
- FloridaResearcher con LAND context
- Coordinator con LAND-aware system prompt
- ContextualGlossaryAgent con disclaimer "PROPIEDAD TIPO TERRENO DETECTADA"
NO computa finance_calculator (Cap Rate/DSCR/CoC no aplican a raw land).
NO inyecta numbers invalidos a los agentes.
"""
detected_type = property_type_warning.get("suggested_type", "land") or "land"
_emit(status_cb, f"[Land simplified] Pipeline reducido para property_type={detected_type}")
# 1. Data fetchers Wave 1 — siguen utiles para land (zoning, flood, neighborhood)
_emit(status_cb, "Obteniendo datos verificados (FEMA flood / Census neighborhood) — utiles para LAND...")
try:
verified_data = fetch_all(deal, status_cb=status_cb)
except Exception as e:
_emit(status_cb, f" fetch_all fallo: {e}. Continuamos con verified_data vacio.")
verified_data = {"geocode": {}, "flood": {}, "fmr": {}, "hurricanes": [], "fetch_errors": [str(e)]}
photo_msg = _no_photo_message(deal)
photo = AgentResult(agent="PhotoInspector", output=photo_msg, seconds=0)
# 2. FloridaResearcher con LAND-aware prompt
research_prompt = _build_land_research_prompt(deal, verified_data, property_type_warning)
_emit(status_cb, "Corriendo FloridaResearcher (LAND mode)...")
research = _run_agent("FloridaResearcher", research_prompt, status_cb=status_cb)
# 3. Coordinator con LAND-aware prompt
coord_prompt = _build_land_coordinator_prompt(
deal, photo_msg, research.output, verified_data, property_type_warning
)
_emit(status_cb, "Corriendo Coordinator (LAND mode)...")
final = _run_agent("Coordinator", coord_prompt, status_cb=status_cb)
# 4. ContextualGlossaryAgent con LAND-aware prompt
briefing_prompt = _build_land_glossary_prompt(
deal, profile, final.output, research.output, verified_data, property_type_warning
)
_emit(status_cb, "Corriendo ContextualGlossaryAgent (LAND mode)...")
briefing = _run_agent("ContextualGlossaryAgent", briefing_prompt, status_cb=status_cb)
finished_at = datetime.now()
total = time.perf_counter() - t0
# Persistir con flag pipeline_mode="land_simplified"
result = AnalysisResult(
deal=asdict(deal),
profile=asdict(profile),
photo=asdict(photo),
deal_analysis={"output": "SKIPPED — property_type=land (no aplica DSCR/Cap Rate)", "seconds": 0, "agent": "DealAnalyzer", "tokens": 0, "error": None},
research=asdict(research),
lender={"output": "SKIPPED — property_type=land (loans diferentes: land loans/construction loans, no SFR mortgage)", "seconds": 0, "agent": "LenderMatcher", "tokens": 0, "error": None},
final=asdict(final),
started_at=started_at.isoformat(),
finished_at=finished_at.isoformat(),
total_seconds=total,
verified_data=verified_data,
executive_briefing=asdict(briefing),
computed_scenarios={}, # NO computado para land
property_value_data={},
value_estimate={"output": "SKIPPED — property_type=land (comparables SFR no aplican; necesita land comps)", "seconds": 0, "agent": "ValueEstimator", "tokens": 0, "error": None},
offer_strategy={"output": "SKIPPED — property_type=land (Strike/MAB formulas no aplican; necesita Land Offer Strategist)", "seconds": 0, "agent": "OfferStrategist", "tokens": 0, "error": None},
price_validation={"status": "SKIPPED", "reason": "property_type=land — comp benchmarks no aplican"},
property_type_warning=property_type_warning,
pipeline_mode="land_simplified",
)
# Persist
out_dir = Path(ANALYSES_DIR)
out_dir.mkdir(parents=True, exist_ok=True)
stamp = started_at.strftime("%Y%m%d_%H%M%S")
safe_addr = "".join(c if c.isalnum() else "_" for c in deal.address)[:50]
out_file = out_dir / f"{stamp}_{safe_addr}_LAND.json"
out_file.write_text(json.dumps(asdict(result), ensure_ascii=False, indent=2), encoding="utf-8")
_emit(status_cb, f"Analisis LAND guardado: `{out_file.name}` ({total:.0f}s total)")
return result
# ---------------------------------------------------------------------------
# LAND-mode prompt builders (simplified — los SFR builders no aplican)
# ---------------------------------------------------------------------------
def _build_land_research_prompt(deal: DealInputs, verified_data: dict, warning: dict) -> str:
flood = (verified_data.get("flood") or {}).get("zone", "?")
nbh = (verified_data.get("neighborhood") or {}).get("neighborhood_class", "?")
sugg = warning.get("suggested_type", "land")
warnings_text = "\n".join(f" - {w}" for w in warning.get("warnings", [])[:6])
return f"""Investiga este TERRENO/LOTE en Florida (NO es SFR — pipeline simplificado).
DEAL:
- Address: {deal.address}
- Precio listado: ${deal.price:,.0f}
- year_built: {deal.year_built} (0 = sin construccion existente)
- sqft habitable: {deal.sqft} (0 = sin estructura)
- beds/baths: {deal.beds}/{deal.baths}
- Descripcion: {deal.listing_description or '(no provista)'}
DETECCION AUTOMATICA:
{warnings_text}
Sistema sugiere property_type = '{sugg}' (confianza: {warning.get('confidence')})
CONTEXTO VERIFICADO:
- FEMA flood zone: {flood}
- Neighborhood class: {nbh}
═══ TU TAREA (LAND-mode) ═══
Esta propiedad NO es un SFR rentable. Genera analisis de TERRENO/LOTE en 4 secciones:
1. **Perfil del lote y zona** (county, city, neighborhood class si es zona urbana o rural,
uso permitido segun zoning probable, density allowed).
2. **Costos esperados para construir** (Florida 2026 typical):
- Site preparation: $5K-$30K segun terreno (clearing, fill, grading)
- Permits + impact fees: $10K-$40K segun condado (Miami-Dade tiene impact fees altos)
- Construction $/sqft FL 2026: $180-$280 (vinilo) $300-$450 (block construction)
- Utilities hookup si no existen: $10K-$50K (agua, septic, electric, gas)
- TOTAL ballpark para casa 1500 sqft: $250K-$500K all-in
3. **Riesgos LAND-specific en Florida**:
- FEMA flood zone (si AE/VE, construir es muy caro o imposible)
- Wetlands designation (puede impedir desarrollo)
- Endangered species (gopher tortoise, scrub jay)
- Septic vs sewer connection
- Setbacks + minimum lot size requirements
- HOA/CDD covenants (si aplica)
- Possible tax deed redemption issues si proviene de auction
4. **Strategies viables para LAND**:
- Build-to-rent (construir SFR para alquilar)
- Build-to-flip (construir y vender)
- Buy-and-hold land bank (esperar appreciation)
- Wholesale a builder
- Subdivide en multiple lots (si zoning permite)
NO calcules Cap Rate, DSCR, CoC, ni cash flow — esto es LAND, no genera renta sin construir antes.
NO inventes comparables de SFR.
Output 100% en espanol. Minimo 600 palabras de analisis sustantivo.
"""
def _build_land_coordinator_prompt(deal: DealInputs, photo_msg: str, research: str,
verified_data: dict, warning: dict) -> str:
sugg = warning.get("suggested_type", "land")
return f"""Sintetiza el analisis para esta propiedad de TIPO TERRENO.
CRITICAL CONTEXT:
- El usuario ingreso esta propiedad como '{warning.get('declared_type', 'sfr')}', pero el sistema
detecto fuertemente que es '{sugg}'. Pipeline SFR fue BYPASSADO.
- NO hay DealAnalyzer / LenderMatcher / ValueEstimator / OfferStrategist outputs porque
esos agentes asumen SFR rentable y darian numeros invalidos.
- Solo tenemos FloridaResearcher output (LAND-mode).
DEAL:
- Address: {deal.address}
- Precio: ${deal.price:,.0f}
- year_built: {deal.year_built}, sqft: {deal.sqft}, beds/baths: {deal.beds}/{deal.baths}
WARNINGS QUE DISPARARON BYPASS:
{chr(10).join(' - ' + w for w in warning.get('warnings', []))}
FloridaResearcher OUTPUT:
---
{research}
---
═══ TU TAREA ═══
Genera el veredicto final con el siguiente formato. **NO menciones DSCR/Cap Rate/CoC/Cash Flow**
porque esos no aplican a land. NO inventes que se hizo un analisis financiero.
1. **ALERTA: PROPIEDAD TIPO TERRENO DETECTADA**
Una linea explicando que el sistema detecto LAND (no SFR) y por que el analisis SFR
no aplica. Citar 2-3 indicadores especificos.
2. **Veredicto LAND**: PASA / PASA CON CONDICIONES / NO PASA segun analisis del FloridaResearcher.
Justificar con zoning, riesgos, viabilidad de construccion.
3. **Mejor estrategia LAND**: build-to-rent / build-to-flip / land bank / wholesale.
4. **Top 3 riesgos LAND-specific**: flood zone, wetlands, costos de construccion, etc.
5. **Costos all-in estimados**: precio land + site prep + permits + construction + utilities.
6. **Proximos pasos**:
- Verificar zoning con el municipality (zoning map online)
- Geotechnical survey ($1K-$3K) para foundation feasibility
- Title search profesional
- Survey y boundary check
- Si es tax deed: 1-year redemption period awareness
- Si el usuario realmente queria SFR: re-buscar deals que SI sean SFR habitables
Output 100% espanol. NO formato SFR. NO inventes financial metrics.
"""
def _build_land_glossary_prompt(deal: DealInputs, profile: BuyerProfile, coord_output: str,
research_output: str, verified_data: dict, warning: dict) -> str:
sugg = warning.get("suggested_type", "land")
declared = warning.get("declared_type", "sfr")
warnings_text = "\n".join(f" - {w}" for w in warning.get('warnings', [])[:6])
return f"""Genera el briefing ejecutivo para esta propiedad de TIPO TERRENO.
CRITICAL: el usuario es un inversionista que ingreso este deal como '{declared}' pero el sistema
detecto que es '{sugg}'. El briefing DEBE arrancar con una alerta destacada y NO presentar
metricas SFR (Cap Rate, DSCR, CoC) porque NO se calcularon.
DEAL: {deal.address} | Precio: ${deal.price:,.0f}
year_built: {deal.year_built}, sqft: {deal.sqft}, beds: {deal.beds}, baths: {deal.baths}
PERFIL COMPRADOR: Clase {profile.profile_class}, FICO {profile.fico}, capital ${profile.capital_available}
WARNINGS DETECCION:
{warnings_text}
COORDINATOR OUTPUT:
---
{coord_output}
---
═══ TU TAREA — BRIEFING EJECUTIVO LAND-MODE ═══
EL BRIEFING DEBE EMPEZAR con esta seccion H1 obligatoria:
# ALERTA CRITICA: PROPIEDAD TIPO TERRENO DETECTADA
Explica en 4-6 lineas:
1. Que el usuario declaro este deal como '{declared.upper()}' pero los inputs sugieren fuertemente '{sugg.upper()}'.
2. Que las metricas SFR (Cap Rate, DSCR, Cash Flow, CoC) NO se calcularon porque NO aplican a land
(la propiedad no genera renta sin construir antes).
3. Que el pipeline normal de analisis (DealAnalyzer + LenderMatcher + ValueEstimator + OfferStrategist)
fue BYPASSADO porque esos agentes asumen SFR rentable.
4. Que el inversionista debe DECIDIR:
- (a) Cambiar property_type a 'land' y aceptar el analisis LAND-mode actual
- (b) Si realmente queria SFR, re-buscar otro deal (este NO es SFR habitable)
5. Si el deal sigue siendo interesante como LAND: pasos requeridos diferentes a SFR.
DESPUES de esa alerta, sigue con el briefing LAND-mode formato:
## Resumen del deal (LAND)
[lo que ES esta propiedad]
## Analisis LAND (del FloridaResearcher)
[zoning, costos de construccion FL, riesgos LAND-specific]
## Strategies viables
[build-to-rent, build-to-flip, land bank, wholesale, subdivide]
## Riesgos LAND-specific
[flood zone, wetlands, utilities access, septic vs sewer, setbacks]
## Costos all-in estimados
[precio land + permits + utilities + construction = total all-in]
## Proximos pasos
[zoning verify, geotechnical, title search, survey, etc]
## Por que NO calculamos numbers SFR
[explicar que Cap Rate / DSCR / Cash Flow asumen renta de propiedad existente — land no tiene renta hasta construir]
Output 100% en espanol latinoamericano. NO inventes Cap Rate / DSCR / etc.
NO digas "analisis SFR completo" — fue intencionalmente bypassado.
NO recomendates Section 8 ni BRRRR (no aplican a raw land).
Tono: senior advisor explicando al inversionista por que esto NO es lo que el creia.
"""
def detect_property_type_anomalies(deal: DealInputs) -> dict:
"""Detecta indicadores de que la property NO es SFR cuando el usuario dijo SFR.
Returns dict con:
warnings: list[str] — los indicadores que dispararon la alerta
suggested_type: str | None — el tipo sugerido (land/commercial/mobile_home/etc)
confidence: "high" | "medium" | "low" — que tan fuerte es la senal
is_mismatch: bool — True si user_input != suggested_type
recommendation: str — accion recomendada (cambiar tipo / proceder con caveats)
"""
warnings: list[str] = []
score_land = 0
score_commercial = 0
score_mobile = 0
# Atributos faltantes/cero (signal fuerte para LAND)
if deal.year_built == 0 or deal.year_built is None:
warnings.append("year_built=0 (sin año de construcción) → sugiere LAND/lote vacante")
score_land += 2
if deal.sqft == 0 or deal.sqft is None:
warnings.append("sqft=0 (sin metros cuadrados habitables) → sugiere LAND")
score_land += 2
if deal.beds == 0 and deal.baths == 0:
warnings.append("0 beds/0 baths → sugiere LAND o lote sin estructura habitable")
score_land += 2
if deal.beds == 0 and deal.year_built == 0:
# double signal — definitely not residential
score_land += 1
# Precio anomalamente bajo para SFR
if deal.price < 20_000 and deal.property_type == "sfr":
warnings.append(f"Precio ${deal.price:,.0f} <$20K es atipico para SFR habitable. "
"Sugiere LAND, mobile home destruido, o property tax deed con strings.")
score_land += 1
score_mobile += 1
# Rent en 0 (no genera renta — LAND o teardown)
if (deal.rent == 0 or deal.rent is None) and deal.property_type == "sfr":
warnings.append("rent=0 (sin estimacion de renta) → atipico para SFR. Posible LAND o needs full rehab.")
score_land += 1
# ARV en 0 (no se ha estimado, comun en LAND deals)
if (deal.arv == 0 or deal.arv is None) and deal.property_type == "sfr":
warnings.append("arv=0 (sin ARV) → en SFR esto es raro, comun en LAND.")
score_land += 0.5 # weak signal
# Description keywords
desc = (deal.listing_description or "").lower()
if desc:
for kw in _LAND_KEYWORDS:
if kw in desc:
warnings.append(f"Descripcion menciona '{kw}' → LAND")
score_land += 2
break # only count once
for kw in _COMMERCIAL_KEYWORDS:
if kw in desc:
warnings.append(f"Descripcion menciona '{kw}' → COMMERCIAL")
score_commercial += 2
break
for kw in _MOBILE_HOME_KEYWORDS:
if kw in desc:
warnings.append(f"Descripcion menciona '{kw}' → MOBILE HOME")
score_mobile += 2
break
# Pick winner
suggested_type: Optional[str] = None
confidence = "low"
if score_land > score_commercial and score_land > score_mobile:
suggested_type = "land"
elif score_commercial > score_land and score_commercial > score_mobile:
suggested_type = "commercial"
elif score_mobile > score_land and score_mobile > score_commercial:
suggested_type = "mobile_home"
total_score = max(score_land, score_commercial, score_mobile)
if total_score >= 4:
confidence = "high"
elif total_score >= 2:
confidence = "medium"
else:
confidence = "low"
is_mismatch = bool(
suggested_type and suggested_type != deal.property_type
)
if not warnings:
recommendation = "Inputs consistentes con property_type declarado. Procede."
elif is_mismatch and confidence == "high":
recommendation = (
f"MISMATCH FUERTE: el usuario declaro '{deal.property_type}' pero los inputs "
f"sugieren fuertemente '{suggested_type.upper()}'. El pipeline SFR no aplica. "
f"Cambiar property_type a '{suggested_type}' y re-correr."
)
elif is_mismatch and confidence == "medium":
recommendation = (
f"POSIBLE MISMATCH: inputs sugieren '{suggested_type}'. "
f"Validar manualmente antes de proceder."
)
elif warnings:
recommendation = (
"Algunos inputs son atipicos para SFR pero la senal no es concluyente. "
"Revisar warnings."
)
else:
recommendation = "OK"
return {
"warnings": warnings,
"suggested_type": suggested_type,
"confidence": confidence,
"is_mismatch": is_mismatch,
"declared_type": deal.property_type,
"score_land": score_land,
"score_commercial": score_commercial,
"score_mobile": score_mobile,
"recommendation": recommendation,
# Trigger condition: si confidence==high AND mismatch → bypass SFR pipeline
"should_bypass_sfr_pipeline": is_mismatch and confidence == "high",
}
def _run_agent(
model: str,
prompt: str,
images: Optional[list] = None,
status_cb: StatusCallback = None,
) -> AgentResult:
"""Llama un agente Ollama. keep_alive segun RESOURCE_MODE."""
keep_alive = KEEP_ALIVE_BY_MODE[RESOURCE_MODE]
_emit(status_cb, f"Cargando **{model}**...")
started = time.perf_counter()
try:
messages = [{"role": "user", "content": prompt}]
if images:
messages[0]["images"] = images
_emit(status_cb, f"**{model}** procesando...")
response = ollama.chat(
model=model,
messages=messages,
keep_alive=keep_alive,
)
elapsed = time.perf_counter() - started
unload_note = " Descargando de memoria..." if keep_alive == 0 else ""
_emit(status_cb, f"OK - **{model}** termino en {elapsed:.1f}s.{unload_note}")
return AgentResult(
agent=model,
output=response["message"]["content"],
seconds=elapsed,
tokens=response.get("eval_count", 0),
)
except Exception as e:
elapsed = time.perf_counter() - started
_emit(status_cb, f"ERROR - **{model}** fallo: {e}")
return AgentResult(
agent=model,
output="",
seconds=elapsed,
error=str(e),
)
# ---------------------------------------------------------------------------
# PhotoInspector multi-foto (llama3.2-vision solo acepta 1 imagen por llamada)
# ---------------------------------------------------------------------------
def run_photo_inspector_multi(
deal: DealInputs,
photo_bytes_list: list,
status_cb: StatusCallback,
) -> AgentResult:
"""
Procesa fotos UNA POR UNA y consolida resultados en un reporte unico.
llama3.2-vision en Ollama solo soporta 1 imagen por llamada (status 500
si se pasan multiples). Estrategia en 2 fases:
1) Para cada foto: llamada individual al PhotoInspector con build_photo_prompt_single
2) Consolidacion: una llamada extra (sin imagen) con todos los reportes via
build_photo_consolidation_prompt
Devuelve un AgentResult unico cuyo `output` es el reporte consolidado.
`seconds` y `tokens` son la suma de todas las llamadas (individual + consolidacion).
"""
if not photo_bytes_list:
return AgentResult(
agent="PhotoInspector",
output="No se subieron fotos.",
seconds=0,
)
n = len(photo_bytes_list)
individual_reports: list[dict] = []
total_seconds = 0.0
total_tokens = 0
_emit(status_cb, f"PhotoInspector: procesando {n} foto(s) individualmente...")
for i, photo_bytes in enumerate(photo_bytes_list, 1):
_emit(status_cb, f" foto {i}/{n}: analizando...")
prompt = build_photo_prompt_single(deal, photo_index=i, total_photos=n)
result = _run_agent(
"PhotoInspector",
prompt,
images=[photo_bytes], # UNA imagen por llamada (requisito de llama3.2-vision)
status_cb=None, # silenciar log per-foto, ya emitimos arriba
)
total_seconds += result.seconds
if result.error:
_emit(status_cb, f" foto {i}: fallo ({result.error}). Continuando con las demas.")
continue
individual_reports.append({
"photo_index": i,
"analysis": result.output,
"seconds": result.seconds,
"tokens": result.tokens,
})
total_tokens += result.tokens
# Si todas las fotos fallaron, devolver error agregado
if not individual_reports:
return AgentResult(
agent="PhotoInspector",
output="Todas las fotos fallaron en el analisis individual.",
seconds=total_seconds,
tokens=total_tokens,
error="all_photos_failed",
)
# Si solo se proceso 1 foto OK, podemos devolver su analisis directo (skip consolidacion)
if len(individual_reports) == 1:
only = individual_reports[0]
_emit(status_cb, "PhotoInspector: solo 1 reporte util, sin consolidacion.")
return AgentResult(
agent="PhotoInspector",
output=only["analysis"],
seconds=total_seconds,
tokens=total_tokens,
)
# Consolidacion (texto-only, sin imagen)
_emit(status_cb, f"PhotoInspector: consolidando {len(individual_reports)} analisis...")
consolidation_prompt = build_photo_consolidation_prompt(deal, individual_reports)
final = _run_agent(
"PhotoInspector",
consolidation_prompt,
images=None,
status_cb=None,
)
total_seconds += final.seconds
total_tokens += final.tokens
return AgentResult(
agent="PhotoInspector",
output=final.output,
seconds=total_seconds,
tokens=total_tokens,
error=final.error, # propagar si consolidacion fallo
)
# ---------------------------------------------------------------------------
# API publica
# ---------------------------------------------------------------------------
def analyze_deal(
deal: DealInputs,
profile: BuyerProfile,
photo_bytes: Optional[list] = None,
status_cb: StatusCallback = None,
) -> AnalysisResult:
"""
Ejecuta los 5 agentes EN SECUENCIA:
1. PhotoInspector (si no hay fotos, se omite y se usa rehab_override)
2. DealAnalyzer (usa rehab de PhotoInspector)
3. FloridaResearcher
4. LenderMatcher (usa estrategia ganadora de DealAnalyzer)
5. Coordinator (sintetiza los 4 outputs anteriores)
Cada agente se descarga de VRAM/RAM antes del siguiente (low_power mode).
Persiste el resultado como JSON en analyses/.
"""
started_at = datetime.now()
t0 = time.perf_counter()
# --- 0z. Property Type Detection (LAND bugfix) -------------------------
# CRITICAL: si los inputs sugieren LAND (year_built=0, sqft=0, etc) pero el
# usuario dijo SFR, los agentes financieros generan numeros invalidos (Cap
# Rate 151%, CoC 518%). Detectar ANTES de invocar agentes SFR-specific.
_emit(status_cb, "[0/9] Detectando tipo de propiedad (anomaly check)...")
property_type_warning = detect_property_type_anomalies(deal)
if property_type_warning["warnings"]:
_emit(status_cb, f" {len(property_type_warning['warnings'])} warning(s): "
f"suggested={property_type_warning.get('suggested_type')}, "
f"confidence={property_type_warning.get('confidence')}")
for w in property_type_warning["warnings"][:5]:
_emit(status_cb, f" - {w}")
# SI el detector dice high-confidence LAND + el user input dice SFR, BYPASS
# los agentes SFR. Correr solo FloridaResearcher + LAND-aware Coordinator +
# ContextualGlossaryAgent con disclaimer "TERRENO DETECTADO".
bypass_sfr = property_type_warning.get("should_bypass_sfr_pipeline", False)
# Tambien bypass si el usuario explicitamente declaro "land" / "commercial" / etc
if deal.property_type in ("land", "commercial", "mobile_home"):
bypass_sfr = True
_emit(status_cb, f" property_type='{deal.property_type}' (explicit) — bypassing SFR pipeline")
elif bypass_sfr:
_emit(status_cb,
f" PROPERTY TYPE MISMATCH: declared='{deal.property_type}', "
f"detected='{property_type_warning['suggested_type']}'. "
"Bypassing SFR-specific agents (DealAnalyzer/LenderMatcher/ValueEstimator/OfferStrategist).")
if bypass_sfr:
return _analyze_land_simplified(
deal=deal, profile=profile,
property_type_warning=property_type_warning,
started_at=started_at, t0=t0, status_cb=status_cb,
)
# --- 0a. Price Validation (Bug 2: discrepancy detection) ---------------
# FIRST step intencional: detectar listings >30% bajo/sobre market ANTES
# de quemar ciclos de fetchers + 8 LLM calls. Si CRITICAL_RED_FLAG,
# se inyecta el aviso a todos los prompts posteriores para que cada
# agente lo mencione en su seccion correspondiente.
# Fail-soft: si no hay fuentes (Firecrawl OFF + scraper pendiente),
# devuelve UNKNOWN — el analisis continua normal.
_emit(status_cb, "[1/9] Validando precio contra market estimates (red flag detection)...")
try:
# 1a pasada: solo con listing + neighborhood inferred (heuristica preliminar).
# 2da pasada (post-property_value) hace la validacion solida con tax_assessed/comps reales.
price_validation = validate_price(
address=deal.address,
listing_price=deal.price,
tax_assessed_value=None,
existing_comps_estimate=None,
neighborhood_class=None,
)
pv_status = price_validation.get("status", "UNKNOWN")
if pv_status == "CRITICAL_RED_FLAG":
disc = price_validation.get("signed_max_discrepancy_pct", 0)
_emit(status_cb, f" CRITICAL_RED_FLAG detectado: listing {disc:+.0f}% vs market. Analisis continua con flag activo.")
elif pv_status == "WARNING":
disc = price_validation.get("signed_max_discrepancy_pct", 0)
_emit(status_cb, f" WARNING: listing {disc:+.0f}% vs market (no critico).")
elif pv_status == "NORMAL":
_emit(status_cb, " OK Listing dentro de rango razonable de market estimates.")
else:
_emit(status_cb, " Price validation: UNKNOWN (sin fuentes disponibles — activar ENABLE_FIRECRAWL_PRICE_CHECK).")
except Exception as e:
_emit(status_cb, f" price_validator fallo: {e}. Continuamos sin validacion preliminar.")
price_validation = {"status": "UNKNOWN", "errors": [str(e)]}
# --- 0. Data fetchers (Wave 1) -----------------------------------------
# Datos reales de fuentes oficiales ANTES de los agentes Ollama.
# Fail-soft: si algo falla, se devuelve dict vacio en ese campo.
_emit(status_cb, "Obteniendo datos verificados de fuentes oficiales (Census/FEMA/HUD/NOAA)...")
try:
verified_data = fetch_all(deal, status_cb=status_cb)
except Exception as e:
_emit(status_cb, f"Data fetchers fallaron completo: {e}. Continuamos sin datos verificados.")
verified_data = {"geocode": {}, "flood": {}, "fmr": {}, "hurricanes": [], "fetch_errors": [str(e)]}
# --- 1. PhotoInspector -------------------------------------------------
# Multi-foto: llama3.2-vision en Ollama acepta solo 1 imagen por llamada.
# run_photo_inspector_multi procesa cada foto individualmente y consolida.
if photo_bytes:
photo = run_photo_inspector_multi(deal, photo_bytes, status_cb)
else:
_emit(status_cb, "PhotoInspector omitido (sin fotos).")
photo = AgentResult(
agent="PhotoInspector",
output=_no_photo_message(deal),
seconds=0,
)
# --- Pre-calculo: compute_all_scenarios() en Python ANTES del LLM ------
# Fix #3: el LLM se confunde con aritmetica. Pre-computamos exactos en Python
# y le pasamos el resultado como "DATOS CALCULADOS - no recalcules".
_emit(status_cb, "Calculando escenarios financieros en Python (no LLM)...")
rehab_for_calc = (
deal.rehab_override
if deal.rehab_override is not None
else _extract_rehab_estimate_from_output(photo.output) or 25_000
)
fmr_3br = ((verified_data or {}).get("fmr") or {}).get("fmr_3br")
flood_zone = ((verified_data or {}).get("flood") or {}).get("zone")
county_name = ((verified_data or {}).get("geocode") or {}).get("county_name")
# Wave 1.5A v1.2: surviving_debt del court_records (si court_records detecto liens
# heredables). Si esta vacio o el scraper esta deferred, surviving_debt=0 y el MAB
# queda igual al original.
court_records_data = (verified_data or {}).get("court_records") or {}
surviving_debt = court_records_data.get("total_surviving_debt", 0) or 0
if surviving_debt > 0:
_emit(status_cb,
f" Court records detecto ${surviving_debt:,.0f} en liens heredables — "
"ajustando effective_MAB.")
try:
computed = compute_all_scenarios(
price=deal.price,
rent_monthly=deal.rent,
property_tax_annual=deal.property_tax,
insurance_annual=deal.insurance,
hoa_monthly=deal.hoa,
arv=deal.arv,
rehab=rehab_for_calc,
fmr_3br=fmr_3br,
flood_zone=flood_zone,
county_name=county_name,
deal_type=deal.deal_type,
surviving_debt=surviving_debt, # Wave 1.5A v1.2
)
calculated_block = build_calculated_block(computed)
_emit(status_cb, f" Mejor estrategia calculada: {computed['best_strategy']}")
except Exception as e:
_emit(status_cb, f" finance_calculator fallo: {e}. Continuamos sin pre-calculo.")
computed = {}
calculated_block = ""
# --- 2. DealAnalyzer (con datos verificados + pre-calculos + red flag) ----
deal_analysis = _run_agent(
"DealAnalyzer",
build_deal_prompt(deal, photo.output, verified_data=verified_data,
calculated_block=calculated_block,
price_validation=price_validation),
status_cb=status_cb,
)
# --- 3. FloridaResearcher (con datos verificados + red flag) -----------
research = _run_agent(
"FloridaResearcher",
build_research_prompt(deal, verified_data=verified_data,
price_validation=price_validation),
status_cb=status_cb,
)
# --- 4. LenderMatcher (con red flag + court records v1.2) -------------
lender = _run_agent(
"LenderMatcher",
build_lender_prompt(deal, profile, deal_analysis.output,
price_validation=price_validation,
verified_data=verified_data),
status_cb=status_cb,
)
# --- 5. Coordinator (sintesis final con datos verificados + red flag) --
final = _run_agent(
"Coordinator",
build_synthesis_prompt(
deal,
photo.output,
deal_analysis.output,
research.output,
lender.output,
verified_data=verified_data,
price_validation=price_validation,
),
status_cb=status_cb,
)
# --- 6. property_value fetcher (Wave 2): tax assessed + comps + deductions ---
_emit(status_cb, "Estimando valor real (tax assessed + comps + deductions)...")
zip_code = None
try:
# Best-effort parse del ZIP del address (ultimos 5 digitos)
import re as _re
zm = _re.search(r"\b(\d{5})\b", deal.address)
if zm:
zip_code = zm.group(1)
except Exception:
pass
geo = (verified_data or {}).get("geocode") or {}
# Bug fix 2026-05-15: passar listing_description + condition_status +
# features_special al deduction calculator para evitar deducciones ciegas
# cuando el listing dice "Updated/Remodeled" o "BRAND NEW ROOF".
deal_description = (getattr(deal, "listing_description", "") or "")
deal_condition = (getattr(deal, "condition_status", "") or "")
deal_features_special = list(getattr(deal, "features_special", None) or [])
home_status = ""
active_under_contract = False
zillow_detail_credits = 0
# OPT-IN: enriquecer Zillow MLS deals con property-detail scrape (1 credit).
# Sin esto, listing_description es solo "Badges: ... | Source: Zillow MLS"
# que NO incluye "Updated/Remodeled" ni features. Habilitar con env var
# ENABLE_ZILLOW_DETAIL_ENRICHMENT=true (default false para no quemar creditos).
source_id = (getattr(deal, "source", "") or "").lower()
source_url = (getattr(deal, "source_url", "") or "")
is_zillow_mls = source_id == "zillow" or "zillow.com" in source_url.lower()
enrich_enabled = os.getenv("ENABLE_ZILLOW_DETAIL_ENRICHMENT", "false").lower() == "true"
if is_zillow_mls and source_url and enrich_enabled:
try:
from scrapers.zillow import scrape_zillow_property_detail
_emit(status_cb, f" Zillow detail enrichment: fetching {source_url[:80]}...")
detail, zillow_detail_credits = scrape_zillow_property_detail(
source_url, status_cb=status_cb,
)
if detail.get("description"):
deal_description = detail["description"]
if detail.get("condition_status"):
deal_condition = detail["condition_status"]
if detail.get("features_special"):
deal_features_special = detail["features_special"]
home_status = detail.get("home_status", "") or ""
active_under_contract = bool(detail.get("active_under_contract"))
if active_under_contract:
_emit(status_cb, f" WARNING: home_status={home_status!r} — competidor activo")
_emit(status_cb, f" detail: condition={detail.get('condition_status')!r} "
f"year={detail.get('year_built')} features={len(detail.get('features_special',[]))} "
f"reno_kws={len(detail.get('renovation_keywords_found',[]))}")
except Exception as e:
_emit(status_cb, f" zillow detail enrich fallo: {e}")
try:
property_value_data = fetch_property_value(
address=deal.address,
listing_price=deal.price,
sqft=deal.sqft,
beds=deal.beds,
baths=deal.baths,
year_built=deal.year_built,
zip_code=zip_code or geo.get("zip"),
county_name=geo.get("county_name"),
state=geo.get("state"),
photo_findings_text=photo.output,
listing_description=deal_description,
condition_status=deal_condition,
features_special=deal_features_special,
)
except Exception as e:
_emit(status_cb, f"property_value fetcher fallo: {e}")
property_value_data = {"fetch_errors": [str(e)]}
# Bug fix 2026-05-15: Inyectar home_status + active_under_contract en
# property_value_data para que ValueEstimator y el resto del pipeline lo
# vean. Active Under Contract = competidor ya en juego = red_flag.
if isinstance(property_value_data, dict):
if home_status:
property_value_data["home_status"] = home_status
if active_under_contract:
property_value_data["active_under_contract"] = True
# Add as deducciones-level metadata for the ValueEstimator prompt
ded = property_value_data.get("deductions") or {}
if isinstance(ded, dict):
existing_warnings = ded.get("_warnings") or []
if "active_under_contract" not in existing_warnings:
existing_warnings.append("active_under_contract")
ded["_warnings"] = existing_warnings
property_value_data["deductions"] = ded
# Track Firecrawl credits we spent enriching from detail page
if zillow_detail_credits:
property_value_data["zillow_detail_credits_used"] = zillow_detail_credits
property_value_data.setdefault("sources_used", []).append(
f"Zillow property detail enrichment ({zillow_detail_credits} credit)"
)
# Surface the enriched fields explicitly
if deal_condition:
property_value_data["enriched_condition_status"] = deal_condition
if deal_features_special:
property_value_data["enriched_features_special"] = deal_features_special
# --- 6b. Re-validar precio con tax_assessed + comps recien obtenidos ---
# Bug 2: si la 1a pasada quedo en UNKNOWN (Firecrawl OFF), ahora tenemos
# tax_assessed_value y comps_mid via property_value_data. Re-correr el
# validator nos da un veredicto solido aun sin Firecrawl.
# Wave 1.5A: si court_records detectó lis pendens, UPGRADE price_validation
# a CONFIRMED_DISTRESSED (mas fuerte que CRITICAL_RED_FLAG porque ya no es
# hipotesis, es hecho judicial publicamente verificable).
court = (verified_data or {}).get("court_records") or {}
if court.get("status") == "LIS_PENDENS_ACTIVE":
_emit(status_cb, f" CONFIRMED_DISTRESSED: lis pendens activo "
f"({court.get('lis_pendens_count', 0)} caso(s)) en Duval clerk records.")
price_validation = {
**price_validation,
"status": "CONFIRMED_DISTRESSED",
"court_records": court,
"deal_type_mismatch": deal.deal_type not in ("auction", "foreclosure", "tax_deed", "reo"),
"recommendation": (
f"LIS PENDENS ACTIVO CONFIRMADO en Duval clerk records "
f"({court.get('lis_pendens_count')} caso(s)). El owner '{court.get('owner_name')}' "
f"tiene foreclosure pendiente. El deal_type ingresado por el usuario "
f"('{deal.deal_type}') NO refleja esta realidad — el sistema sugiere "
"tratar como foreclosure auction (recalcular como MAB). NO ofertar como "
"MLS normal — la propiedad puede tener liens, code violations, y otros "
"issues heredables. Court records confirman lo que el price_validator "
"habia hipotetizado."
),
}
if price_validation.get("status") == "UNKNOWN" and isinstance(property_value_data, dict):
tax_av = property_value_data.get("tax_assessed_value")
ev = property_value_data.get("estimated_value") or {}
est = ev.get("mid")
est_conf = ev.get("confidence")
est_sources = property_value_data.get("sources_used") or []
nbh_class = ((verified_data or {}).get("neighborhood") or {}).get("neighborhood_class")
if tax_av or est:
try:
_emit(status_cb, " Re-validando precio con tax assessed + comps...")
price_validation = validate_price(
address=deal.address,
listing_price=deal.price,
tax_assessed_value=tax_av,
existing_comps_estimate=est,
existing_comps_confidence=est_conf, # Bug 4: reject low-confidence
existing_comps_sources=est_sources, # Bug 4: reject deductions-only
neighborhood_class=nbh_class, # Bug 6: distressed hypothesis
)
pv_status = price_validation.get("status", "UNKNOWN")
if pv_status == "CRITICAL_RED_FLAG":
disc = price_validation.get("signed_max_discrepancy_pct", 0)
_emit(status_cb, f" CRITICAL_RED_FLAG (post-property_value): {disc:+.0f}%.")
elif pv_status == "WARNING":
disc = price_validation.get("signed_max_discrepancy_pct", 0)
_emit(status_cb, f" WARNING post-property_value: {disc:+.0f}%.")
elif pv_status == "NORMAL":
_emit(status_cb, " OK Re-validacion NORMAL.")
except Exception as e:
_emit(status_cb, f" Re-validacion fallo: {e}")
# --- 7. ValueEstimator (Wave 2): interpreta el property_value_data ----
value_estimate = _run_agent(
"ValueEstimator",
build_value_prompt(deal, property_value_data, verified_data=verified_data,
price_validation=price_validation),
status_cb=status_cb,
)
# --- 8. OfferStrategist (Wave 2): genera Strike/Stretch/Walk-Away ----
offer_strategy = _run_agent(
"OfferStrategist",
build_offer_prompt(
deal=deal,
profile=profile,
value_estimate_output=value_estimate.output,
deal_analysis_output=deal_analysis.output,
verified_data=verified_data,
computed_scenarios=computed if isinstance(computed, dict) else None,
price_validation=price_validation,
),
status_cb=status_cb,
)
# --- 9. ContextualGlossaryAgent (briefing ejecutivo espanol + USA context) ---
briefing = _run_agent(
"ContextualGlossaryAgent",
build_glossary_prompt(
deal=deal,
profile=profile,
tranchi_output=final.output,
deal_analysis_output=deal_analysis.output,
research_output=research.output,
lender_output=lender.output,
verified_data=verified_data,
value_estimate_output=value_estimate.output,
offer_strategy_output=offer_strategy.output,
price_validation=price_validation,
),
status_cb=status_cb,
)
finished_at = datetime.now()
total = time.perf_counter() - t0
result = AnalysisResult(
deal=asdict(deal),
profile=asdict(profile),
photo=asdict(photo),
deal_analysis=asdict(deal_analysis),
research=asdict(research),
lender=asdict(lender),
final=asdict(final),
started_at=started_at.isoformat(),
finished_at=finished_at.isoformat(),
total_seconds=total,
verified_data=verified_data,
executive_briefing=asdict(briefing),
computed_scenarios=computed if isinstance(computed, dict) else {},
property_value_data=property_value_data,
value_estimate=asdict(value_estimate),
offer_strategy=asdict(offer_strategy),
price_validation=price_validation if isinstance(price_validation, dict) else {},
property_type_warning=property_type_warning,
pipeline_mode="full_sfr",
)
# Persistir como JSON con timestamp + direccion
out_dir = Path(ANALYSES_DIR)
out_dir.mkdir(parents=True, exist_ok=True)
stamp = started_at.strftime("%Y%m%d_%H%M%S")
safe_addr = "".join(c if c.isalnum() else "_" for c in deal.address)[:50]
out_file = out_dir / f"{stamp}_{safe_addr}.json"
out_file.write_text(
json.dumps(asdict(result), ensure_ascii=False, indent=2),
encoding="utf-8",
)
_emit(status_cb, f"Analisis guardado: `{out_file.name}` ({total:.0f}s total)")
return result
def compose_email(
email_type: int,
deal: DealInputs,
profile: BuyerProfile,
last_analysis: Optional[AnalysisResult],
recipient: dict,
language: str = "english",
urgency: str = "medium",
status_cb: StatusCallback = None,
) -> AgentResult:
"""Invocacion on-demand a EmailComposer (post-analisis)."""
prompt = build_email_prompt(
email_type=email_type,
deal=deal,
profile=profile,
last_analysis=last_analysis,
recipient=recipient,
language=language,
urgency=urgency,
)
return _run_agent("EmailComposer", prompt, status_cb=status_cb)