Files
AR-House/scripts/test_jacksonville_bugfixes.py
T
2026-07-03 12:24:58 -04:00

246 lines
10 KiB
Python

"""Test end-to-end del pipeline AR-House con el escenario Jacksonville $70K.
Valida que los 3 bugs estan arreglados:
- Bug 1: outputs exhaustivos (>= 400 palabras/seccion en agentes tecnicos)
- Bug 2: CRITICAL_RED_FLAG detectado + inyectado a todos los agentes
- Bug 3: anomalias detectadas (Cap Rate >12%, etc.) + DealAnalyzer incluye
seccion "Validacion de Inputs Requerida"
Corre los 8 agentes Ollama en secuencia (~5-8 min). Imprime status en stdout
y guarda JSON completo en analyses/. Al final hace assertions y prints
"PASS/FAIL" para cada bug.
"""
from __future__ import annotations
import io
import json
import sys
import time
from pathlib import Path
# Forzar stdout UTF-8 para Windows (los emojis en logs sino crashean cp1252)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
# Imports despues del fix de stdout
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from orchestrator import DealInputs, BuyerProfile, analyze_deal # noqa: E402
def status_cb(msg: str) -> None:
"""Print status con timestamp."""
print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)
def count_words(text: str) -> int:
if not text:
return 0
return len(text.split())
def main() -> int:
print("=" * 70)
print("AR-House — End-to-end test: Jacksonville $70K (Bug 1+2+3)")
print("=" * 70)
# Escenario clasico "demasiado bueno para ser verdad" con ADDRESS REAL
# 5005 N Pearl St (Duval PA confirmed: owner JONES JOHN N, RE# 027301-0000)
# Precio $70K simulado + rent $1,500/mo → triggers suspicious_low + anomalias.
# Con ENABLE_COURT_RECORDS=true, court_records debe popular owner_name.
deal = DealInputs(
address="5005 N Pearl St, Jacksonville, FL 32206",
price=70_000,
rent=1_500,
property_tax=2_000,
insurance=1_800,
hoa=0,
sqft=1_200,
beds=3,
baths=2.0,
year_built=1985,
arv=180_000,
rehab_override=25_000, # skipping PhotoInspector
deal_type="mls",
)
profile = BuyerProfile(
profile_class="C",
fico=720,
capital_available=50_000,
nationality="Argentina",
)
print(f"\nDEAL: {deal.address}")
print(f" price={deal.price:,} rent={deal.rent:,}/mo arv={deal.arv:,}")
print(f" year_built={deal.year_built}, rehab_override={deal.rehab_override:,}")
print()
t0 = time.perf_counter()
result = analyze_deal(deal, profile, photo_bytes=None, status_cb=status_cb)
elapsed = time.perf_counter() - t0
print()
print("=" * 70)
print(f"ANALISIS COMPLETADO en {elapsed:.0f}s")
print("=" * 70)
# ════════════════════════════════════════════════════════════
# Validaciones automaticas
# ════════════════════════════════════════════════════════════
# analyze_deal devuelve AnalysisResult dataclass; los AgentResult internos
# son convertidos a dict por asdict() en la construccion. Acceder via [key].
# Wave 1.5A: check court records flow
court = (result.verified_data or {}).get("court_records") or {}
pv = result.price_validation or {}
anomalies = (result.computed_scenarios or {}).get("anomalies", {})
deal_an = (result.deal_analysis or {}).get("output", "") or ""
coord = (result.final or {}).get("output", "") or ""
research = (result.research or {}).get("output", "") or ""
lender = (result.lender or {}).get("output", "") or ""
value_est = (result.value_estimate or {}).get("output", "") or ""
offer_str = (result.offer_strategy or {}).get("output", "") or ""
briefing = (result.executive_briefing or {}).get("output", "") or ""
print()
print("─" * 70)
print("WAVE 1.5A — Court Records Flow")
print("─" * 70)
print(f" court_records.status: {court.get('status')}")
print(f" court_records.county: {court.get('county')}")
print(f" court_records.owner_name: {court.get('owner_name')}")
print(f" court_records.re_number: {court.get('re_number')}")
print(f" court_records.lis_pendens_count: {court.get('lis_pendens_count', 0)}")
print(f" sources_used: {court.get('sources_used', [])}")
owner = court.get('owner_name') or ''
owner_mentions = {
"DealAnalyzer": owner in deal_an if owner else False,
"FloridaResearcher": owner in research if owner else False,
"LenderMatcher": owner in lender if owner else False,
"Coordinator": owner in coord if owner else False,
"ValueEstimator": owner in value_est if owner else False,
"OfferStrategist": owner in offer_str if owner else False,
"ContextualGlossaryAgent": owner in briefing if owner else False,
}
print(f" Owner name '{owner}' mentions in agent outputs:")
for agent, mentioned in owner_mentions.items():
print(f" {agent}: {'✅' if mentioned else '⚠️'}")
wave15a_pass = (
court.get('status') in ('OWNER_VERIFIED', 'LIS_PENDENS_ACTIVE')
and bool(owner)
and sum(owner_mentions.values()) >= 2
)
print(f" → Wave 1.5A flow: {'✅ PASS' if wave15a_pass else '⚠️ PARTIAL/FAIL'}")
print()
print("─" * 70)
print("BUG 2 — Price Discrepancy Detection")
print("─" * 70)
pv_status = pv.get("status")
pv_disc = pv.get("signed_max_discrepancy_pct")
print(f" price_validation.status: {pv_status}")
print(f" signed_max_discrepancy_pct: {pv_disc}")
print(f" sources_used: {pv.get('sources_used', [])}")
bug2_pass = pv_status in ("CRITICAL_RED_FLAG", "WARNING")
print(f" → Bug 2 detection: {'✅ PASS' if bug2_pass else '❌ FAIL (status not flagging)'}")
# ¿El red flag se inyecto en los prompts? Verificamos por el efecto:
# ¿los agentes lo mencionan en sus outputs?
flag_mentions = {
"DealAnalyzer": "red flag" in deal_an.lower() or "precio anomalo" in deal_an.lower()
or "anomalo" in deal_an.lower() or "investigacion" in deal_an.lower()
or "investigación" in deal_an.lower() or "due diligence" in deal_an.lower(),
"FloridaResearcher": "red flag" in research.lower() or "anomalo" in research.lower()
or "discrepancia" in research.lower(),
"LenderMatcher": "red flag" in lender.lower() or "anomalo" in lender.lower()
or "validar" in lender.lower(),
"Coordinator": "red flag" in coord.lower() or "anomalo" in coord.lower()
or "alerta" in coord.lower(),
"ValueEstimator": "red flag" in value_est.lower() or "anomalo" in value_est.lower()
or "discrepancia" in value_est.lower(),
"OfferStrategist": "red flag" in offer_str.lower() or "anomalo" in offer_str.lower(),
"ContextualGlossaryAgent": ("alerta" in briefing.lower() or "🚨" in briefing
or "precio anomalo" in briefing.lower()
or "ANOMALO" in briefing),
}
print(" Mencion del red flag en outputs:")
for agent, mentioned in flag_mentions.items():
print(f" {agent}: {'✅' if mentioned else '⚠️'}")
print()
print("─" * 70)
print("BUG 3 — Anomaly Detection")
print("─" * 70)
print(f" has_anomalies: {anomalies.get('has_anomalies')}")
print(f" is_critical: {anomalies.get('is_critical')}")
print(f" count: {anomalies.get('anomaly_count')} "
f"(HIGH={anomalies.get('high_severity_count')}, MEDIUM={anomalies.get('medium_severity_count')})")
if anomalies.get("flagged_metrics"):
print(" flagged_metrics:")
for f in anomalies["flagged_metrics"]:
print(f" - {f['scenario']} / {f['metric']} = {f['value']} ({f['severity']})")
bug3_python_pass = anomalies.get("has_anomalies", False)
print(f" → Bug 3 Python detection: {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")
# ¿DealAnalyzer incluyo la seccion obligatoria "Validacion de Inputs"?
has_validation_section = (
"validacion de inputs" in deal_an.lower()
or "validación de inputs" in deal_an.lower()
or "validar inputs" in deal_an.lower()
)
print(f" DealAnalyzer incluye '## ⚠️ Validacion de Inputs Requerida': "
f"{'✅ PASS' if has_validation_section else '❌ FAIL — modelo se la salteo'}")
print()
print("─" * 70)
print("BUG 1 — Exhaustividad (mínimo 400 palabras por agente técnico)")
print("─" * 70)
word_counts = {
"DealAnalyzer": count_words(deal_an),
"FloridaResearcher": count_words(research),
"LenderMatcher": count_words(lender),
"Coordinator": count_words(coord),
"ValueEstimator": count_words(value_est),
"OfferStrategist": count_words(offer_str),
}
bug1_pass = True
for agent, wc in word_counts.items():
symbol = "✅" if wc >= 400 else "⚠️"
print(f" {agent}: {wc} palabras {symbol}")
if wc < 400:
bug1_pass = False
# ContextualGlossaryAgent SI puede ser mas corto (es el briefing)
print(f" ContextualGlossaryAgent (briefing — sin minimo): {count_words(briefing)} palabras")
print(f" → Bug 1 exhaustividad: "
f"{'✅ PASS' if bug1_pass else '⚠️ PARTIAL (algunos agentes <400)'}")
print()
print("=" * 70)
print("RESUMEN GLOBAL")
print("=" * 70)
print(f" Bug 1 (exhaustividad): {'✅ PASS' if bug1_pass else '⚠️ PARTIAL'}")
print(f" Bug 2 (price red flag): {'✅ PASS' if bug2_pass else '❌ FAIL'}")
print(f" Bug 3 (anomalias): {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")
print(f" Bug 3 (LLM secciona): {'✅ PASS' if has_validation_section else '❌ FAIL'}")
print(f" Total: {elapsed:.0f}s")
# Excerpt de cada agente para ojo humano
print()
print("=" * 70)
print("EXCERPTS (primeras 500 chars de cada agente)")
print("=" * 70)
for name, text in [
("DealAnalyzer", deal_an),
("Coordinator", coord),
("ContextualGlossaryAgent", briefing),
]:
print(f"\n─── {name} ───")
print(text[:500] + ("..." if len(text) > 500 else ""))
return 0
if __name__ == "__main__":
sys.exit(main())