AR-House/scripts/test_jacksonville_bugfixes.py

"""Test end-to-end del pipeline AR-House con el escenario Jacksonville $70K.

Valida que los 3 bugs estan arreglados:
- Bug 1: outputs exhaustivos (>= 400 palabras/seccion en agentes tecnicos)
- Bug 2: CRITICAL_RED_FLAG detectado + inyectado a todos los agentes
- Bug 3: anomalias detectadas (Cap Rate >12%, etc.) + DealAnalyzer incluye
        seccion "Validacion de Inputs Requerida"

Corre los 8 agentes Ollama en secuencia (~5-8 min). Imprime status en stdout
y guarda JSON completo en analyses/. Al final hace assertions y prints
"PASS/FAIL" para cada bug.
"""
from __future__ import annotations

import io
import json
import sys
import time
from pathlib import Path

# Forzar stdout UTF-8 para Windows (los emojis en logs sino crashean cp1252)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")

# Imports despues del fix de stdout
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))

from orchestrator import DealInputs, BuyerProfile, analyze_deal  # noqa: E402


def status_cb(msg: str) -> None:
    """Print status con timestamp."""
    print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)


def count_words(text: str) -> int:
    if not text:
        return 0
    return len(text.split())


def main() -> int:
    print("=" * 70)
    print("AR-House — End-to-end test: Jacksonville $70K (Bug 1+2+3)")
    print("=" * 70)

    # Escenario clasico "demasiado bueno para ser verdad" con ADDRESS REAL
    # 5005 N Pearl St (Duval PA confirmed: owner JONES JOHN N, RE# 027301-0000)
    # Precio $70K simulado + rent $1,500/mo → triggers suspicious_low + anomalias.
    # Con ENABLE_COURT_RECORDS=true, court_records debe popular owner_name.
    deal = DealInputs(
        address="5005 N Pearl St, Jacksonville, FL 32206",
        price=70_000,
        rent=1_500,
        property_tax=2_000,
        insurance=1_800,
        hoa=0,
        sqft=1_200,
        beds=3,
        baths=2.0,
        year_built=1985,
        arv=180_000,
        rehab_override=25_000,    # skipping PhotoInspector
        deal_type="mls",
    )
    profile = BuyerProfile(
        profile_class="C",
        fico=720,
        capital_available=50_000,
        nationality="Argentina",
    )

    print(f"\nDEAL: {deal.address}")
    print(f"  price={deal.price:,} rent={deal.rent:,}/mo arv={deal.arv:,}")
    print(f"  year_built={deal.year_built}, rehab_override={deal.rehab_override:,}")
    print()

    t0 = time.perf_counter()
    result = analyze_deal(deal, profile, photo_bytes=None, status_cb=status_cb)
    elapsed = time.perf_counter() - t0

    print()
    print("=" * 70)
    print(f"ANALISIS COMPLETADO en {elapsed:.0f}s")
    print("=" * 70)

    # ════════════════════════════════════════════════════════════
    # Validaciones automaticas
    # ════════════════════════════════════════════════════════════
    # analyze_deal devuelve AnalysisResult dataclass; los AgentResult internos
    # son convertidos a dict por asdict() en la construccion. Acceder via [key].
    # Wave 1.5A: check court records flow
    court = (result.verified_data or {}).get("court_records") or {}

    pv = result.price_validation or {}
    anomalies = (result.computed_scenarios or {}).get("anomalies", {})
    deal_an = (result.deal_analysis or {}).get("output", "") or ""
    coord = (result.final or {}).get("output", "") or ""
    research = (result.research or {}).get("output", "") or ""
    lender = (result.lender or {}).get("output", "") or ""
    value_est = (result.value_estimate or {}).get("output", "") or ""
    offer_str = (result.offer_strategy or {}).get("output", "") or ""
    briefing = (result.executive_briefing or {}).get("output", "") or ""

    print()
    print("─" * 70)
    print("WAVE 1.5A — Court Records Flow")
    print("─" * 70)
    print(f"  court_records.status: {court.get('status')}")
    print(f"  court_records.county: {court.get('county')}")
    print(f"  court_records.owner_name: {court.get('owner_name')}")
    print(f"  court_records.re_number: {court.get('re_number')}")
    print(f"  court_records.lis_pendens_count: {court.get('lis_pendens_count', 0)}")
    print(f"  sources_used: {court.get('sources_used', [])}")

    owner = court.get('owner_name') or ''
    owner_mentions = {
        "DealAnalyzer": owner in deal_an if owner else False,
        "FloridaResearcher": owner in research if owner else False,
        "LenderMatcher": owner in lender if owner else False,
        "Coordinator": owner in coord if owner else False,
        "ValueEstimator": owner in value_est if owner else False,
        "OfferStrategist": owner in offer_str if owner else False,
        "ContextualGlossaryAgent": owner in briefing if owner else False,
    }
    print(f"  Owner name '{owner}' mentions in agent outputs:")
    for agent, mentioned in owner_mentions.items():
        print(f"    {agent}: {'✅' if mentioned else '⚠️'}")
    wave15a_pass = (
        court.get('status') in ('OWNER_VERIFIED', 'LIS_PENDENS_ACTIVE')
        and bool(owner)
        and sum(owner_mentions.values()) >= 2
    )
    print(f"  → Wave 1.5A flow: {'✅ PASS' if wave15a_pass else '⚠️ PARTIAL/FAIL'}")

    print()
    print("─" * 70)
    print("BUG 2 — Price Discrepancy Detection")
    print("─" * 70)
    pv_status = pv.get("status")
    pv_disc = pv.get("signed_max_discrepancy_pct")
    print(f"  price_validation.status: {pv_status}")
    print(f"  signed_max_discrepancy_pct: {pv_disc}")
    print(f"  sources_used: {pv.get('sources_used', [])}")
    bug2_pass = pv_status in ("CRITICAL_RED_FLAG", "WARNING")
    print(f"  → Bug 2 detection: {'✅ PASS' if bug2_pass else '❌ FAIL (status not flagging)'}")

    # ¿El red flag se inyecto en los prompts? Verificamos por el efecto:
    # ¿los agentes lo mencionan en sus outputs?
    flag_mentions = {
        "DealAnalyzer": "red flag" in deal_an.lower() or "precio anomalo" in deal_an.lower()
                       or "anomalo" in deal_an.lower() or "investigacion" in deal_an.lower()
                       or "investigación" in deal_an.lower() or "due diligence" in deal_an.lower(),
        "FloridaResearcher": "red flag" in research.lower() or "anomalo" in research.lower()
                            or "discrepancia" in research.lower(),
        "LenderMatcher": "red flag" in lender.lower() or "anomalo" in lender.lower()
                        or "validar" in lender.lower(),
        "Coordinator": "red flag" in coord.lower() or "anomalo" in coord.lower()
                      or "alerta" in coord.lower(),
        "ValueEstimator": "red flag" in value_est.lower() or "anomalo" in value_est.lower()
                         or "discrepancia" in value_est.lower(),
        "OfferStrategist": "red flag" in offer_str.lower() or "anomalo" in offer_str.lower(),
        "ContextualGlossaryAgent": ("alerta" in briefing.lower() or "🚨" in briefing
                                    or "precio anomalo" in briefing.lower()
                                    or "ANOMALO" in briefing),
    }
    print("  Mencion del red flag en outputs:")
    for agent, mentioned in flag_mentions.items():
        print(f"    {agent}: {'✅' if mentioned else '⚠️'}")

    print()
    print("─" * 70)
    print("BUG 3 — Anomaly Detection")
    print("─" * 70)
    print(f"  has_anomalies: {anomalies.get('has_anomalies')}")
    print(f"  is_critical: {anomalies.get('is_critical')}")
    print(f"  count: {anomalies.get('anomaly_count')} "
          f"(HIGH={anomalies.get('high_severity_count')}, MEDIUM={anomalies.get('medium_severity_count')})")
    if anomalies.get("flagged_metrics"):
        print("  flagged_metrics:")
        for f in anomalies["flagged_metrics"]:
            print(f"    - {f['scenario']} / {f['metric']} = {f['value']} ({f['severity']})")
    bug3_python_pass = anomalies.get("has_anomalies", False)
    print(f"  → Bug 3 Python detection: {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")

    # ¿DealAnalyzer incluyo la seccion obligatoria "Validacion de Inputs"?
    has_validation_section = (
        "validacion de inputs" in deal_an.lower()
        or "validación de inputs" in deal_an.lower()
        or "validar inputs" in deal_an.lower()
    )
    print(f"  DealAnalyzer incluye '## ⚠️ Validacion de Inputs Requerida': "
          f"{'✅ PASS' if has_validation_section else '❌ FAIL — modelo se la salteo'}")

    print()
    print("─" * 70)
    print("BUG 1 — Exhaustividad (mínimo 400 palabras por agente técnico)")
    print("─" * 70)
    word_counts = {
        "DealAnalyzer": count_words(deal_an),
        "FloridaResearcher": count_words(research),
        "LenderMatcher": count_words(lender),
        "Coordinator": count_words(coord),
        "ValueEstimator": count_words(value_est),
        "OfferStrategist": count_words(offer_str),
    }
    bug1_pass = True
    for agent, wc in word_counts.items():
        symbol = "✅" if wc >= 400 else "⚠️"
        print(f"  {agent}: {wc} palabras {symbol}")
        if wc < 400:
            bug1_pass = False
    # ContextualGlossaryAgent SI puede ser mas corto (es el briefing)
    print(f"  ContextualGlossaryAgent (briefing — sin minimo): {count_words(briefing)} palabras")
    print(f"  → Bug 1 exhaustividad: "
          f"{'✅ PASS' if bug1_pass else '⚠️ PARTIAL (algunos agentes <400)'}")

    print()
    print("=" * 70)
    print("RESUMEN GLOBAL")
    print("=" * 70)
    print(f"  Bug 1 (exhaustividad): {'✅ PASS' if bug1_pass else '⚠️ PARTIAL'}")
    print(f"  Bug 2 (price red flag): {'✅ PASS' if bug2_pass else '❌ FAIL'}")
    print(f"  Bug 3 (anomalias): {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")
    print(f"  Bug 3 (LLM secciona): {'✅ PASS' if has_validation_section else '❌ FAIL'}")
    print(f"  Total: {elapsed:.0f}s")

    # Excerpt de cada agente para ojo humano
    print()
    print("=" * 70)
    print("EXCERPTS (primeras 500 chars de cada agente)")
    print("=" * 70)
    for name, text in [
        ("DealAnalyzer", deal_an),
        ("Coordinator", coord),
        ("ContextualGlossaryAgent", briefing),
    ]:
        print(f"\n─── {name} ───")
        print(text[:500] + ("..." if len(text) > 500 else ""))

    return 0


if __name__ == "__main__":
    sys.exit(main())