246 lines
10 KiB
Python
246 lines
10 KiB
Python
"""Test end-to-end del pipeline AR-House con el escenario Jacksonville $70K.
|
|
|
|
Valida que los 3 bugs estan arreglados:
|
|
- Bug 1: outputs exhaustivos (>= 400 palabras/seccion en agentes tecnicos)
|
|
- Bug 2: CRITICAL_RED_FLAG detectado + inyectado a todos los agentes
|
|
- Bug 3: anomalias detectadas (Cap Rate >12%, etc.) + DealAnalyzer incluye
|
|
seccion "Validacion de Inputs Requerida"
|
|
|
|
Corre los 8 agentes Ollama en secuencia (~5-8 min). Imprime status en stdout
|
|
y guarda JSON completo en analyses/. Al final hace assertions y prints
|
|
"PASS/FAIL" para cada bug.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import json
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
# Forzar stdout UTF-8 para Windows (los emojis en logs sino crashean cp1252)
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
# Imports despues del fix de stdout
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from orchestrator import DealInputs, BuyerProfile, analyze_deal # noqa: E402
|
|
|
|
|
|
def status_cb(msg: str) -> None:
|
|
"""Print status con timestamp."""
|
|
print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)
|
|
|
|
|
|
def count_words(text: str) -> int:
|
|
if not text:
|
|
return 0
|
|
return len(text.split())
|
|
|
|
|
|
def main() -> int:
|
|
print("=" * 70)
|
|
print("AR-House — End-to-end test: Jacksonville $70K (Bug 1+2+3)")
|
|
print("=" * 70)
|
|
|
|
# Escenario clasico "demasiado bueno para ser verdad" con ADDRESS REAL
|
|
# 5005 N Pearl St (Duval PA confirmed: owner JONES JOHN N, RE# 027301-0000)
|
|
# Precio $70K simulado + rent $1,500/mo → triggers suspicious_low + anomalias.
|
|
# Con ENABLE_COURT_RECORDS=true, court_records debe popular owner_name.
|
|
deal = DealInputs(
|
|
address="5005 N Pearl St, Jacksonville, FL 32206",
|
|
price=70_000,
|
|
rent=1_500,
|
|
property_tax=2_000,
|
|
insurance=1_800,
|
|
hoa=0,
|
|
sqft=1_200,
|
|
beds=3,
|
|
baths=2.0,
|
|
year_built=1985,
|
|
arv=180_000,
|
|
rehab_override=25_000, # skipping PhotoInspector
|
|
deal_type="mls",
|
|
)
|
|
profile = BuyerProfile(
|
|
profile_class="C",
|
|
fico=720,
|
|
capital_available=50_000,
|
|
nationality="Argentina",
|
|
)
|
|
|
|
print(f"\nDEAL: {deal.address}")
|
|
print(f" price={deal.price:,} rent={deal.rent:,}/mo arv={deal.arv:,}")
|
|
print(f" year_built={deal.year_built}, rehab_override={deal.rehab_override:,}")
|
|
print()
|
|
|
|
t0 = time.perf_counter()
|
|
result = analyze_deal(deal, profile, photo_bytes=None, status_cb=status_cb)
|
|
elapsed = time.perf_counter() - t0
|
|
|
|
print()
|
|
print("=" * 70)
|
|
print(f"ANALISIS COMPLETADO en {elapsed:.0f}s")
|
|
print("=" * 70)
|
|
|
|
# ════════════════════════════════════════════════════════════
|
|
# Validaciones automaticas
|
|
# ════════════════════════════════════════════════════════════
|
|
# analyze_deal devuelve AnalysisResult dataclass; los AgentResult internos
|
|
# son convertidos a dict por asdict() en la construccion. Acceder via [key].
|
|
# Wave 1.5A: check court records flow
|
|
court = (result.verified_data or {}).get("court_records") or {}
|
|
|
|
pv = result.price_validation or {}
|
|
anomalies = (result.computed_scenarios or {}).get("anomalies", {})
|
|
deal_an = (result.deal_analysis or {}).get("output", "") or ""
|
|
coord = (result.final or {}).get("output", "") or ""
|
|
research = (result.research or {}).get("output", "") or ""
|
|
lender = (result.lender or {}).get("output", "") or ""
|
|
value_est = (result.value_estimate or {}).get("output", "") or ""
|
|
offer_str = (result.offer_strategy or {}).get("output", "") or ""
|
|
briefing = (result.executive_briefing or {}).get("output", "") or ""
|
|
|
|
print()
|
|
print("─" * 70)
|
|
print("WAVE 1.5A — Court Records Flow")
|
|
print("─" * 70)
|
|
print(f" court_records.status: {court.get('status')}")
|
|
print(f" court_records.county: {court.get('county')}")
|
|
print(f" court_records.owner_name: {court.get('owner_name')}")
|
|
print(f" court_records.re_number: {court.get('re_number')}")
|
|
print(f" court_records.lis_pendens_count: {court.get('lis_pendens_count', 0)}")
|
|
print(f" sources_used: {court.get('sources_used', [])}")
|
|
|
|
owner = court.get('owner_name') or ''
|
|
owner_mentions = {
|
|
"DealAnalyzer": owner in deal_an if owner else False,
|
|
"FloridaResearcher": owner in research if owner else False,
|
|
"LenderMatcher": owner in lender if owner else False,
|
|
"Coordinator": owner in coord if owner else False,
|
|
"ValueEstimator": owner in value_est if owner else False,
|
|
"OfferStrategist": owner in offer_str if owner else False,
|
|
"ContextualGlossaryAgent": owner in briefing if owner else False,
|
|
}
|
|
print(f" Owner name '{owner}' mentions in agent outputs:")
|
|
for agent, mentioned in owner_mentions.items():
|
|
print(f" {agent}: {'✅' if mentioned else '⚠️'}")
|
|
wave15a_pass = (
|
|
court.get('status') in ('OWNER_VERIFIED', 'LIS_PENDENS_ACTIVE')
|
|
and bool(owner)
|
|
and sum(owner_mentions.values()) >= 2
|
|
)
|
|
print(f" → Wave 1.5A flow: {'✅ PASS' if wave15a_pass else '⚠️ PARTIAL/FAIL'}")
|
|
|
|
print()
|
|
print("─" * 70)
|
|
print("BUG 2 — Price Discrepancy Detection")
|
|
print("─" * 70)
|
|
pv_status = pv.get("status")
|
|
pv_disc = pv.get("signed_max_discrepancy_pct")
|
|
print(f" price_validation.status: {pv_status}")
|
|
print(f" signed_max_discrepancy_pct: {pv_disc}")
|
|
print(f" sources_used: {pv.get('sources_used', [])}")
|
|
bug2_pass = pv_status in ("CRITICAL_RED_FLAG", "WARNING")
|
|
print(f" → Bug 2 detection: {'✅ PASS' if bug2_pass else '❌ FAIL (status not flagging)'}")
|
|
|
|
# ¿El red flag se inyecto en los prompts? Verificamos por el efecto:
|
|
# ¿los agentes lo mencionan en sus outputs?
|
|
flag_mentions = {
|
|
"DealAnalyzer": "red flag" in deal_an.lower() or "precio anomalo" in deal_an.lower()
|
|
or "anomalo" in deal_an.lower() or "investigacion" in deal_an.lower()
|
|
or "investigación" in deal_an.lower() or "due diligence" in deal_an.lower(),
|
|
"FloridaResearcher": "red flag" in research.lower() or "anomalo" in research.lower()
|
|
or "discrepancia" in research.lower(),
|
|
"LenderMatcher": "red flag" in lender.lower() or "anomalo" in lender.lower()
|
|
or "validar" in lender.lower(),
|
|
"Coordinator": "red flag" in coord.lower() or "anomalo" in coord.lower()
|
|
or "alerta" in coord.lower(),
|
|
"ValueEstimator": "red flag" in value_est.lower() or "anomalo" in value_est.lower()
|
|
or "discrepancia" in value_est.lower(),
|
|
"OfferStrategist": "red flag" in offer_str.lower() or "anomalo" in offer_str.lower(),
|
|
"ContextualGlossaryAgent": ("alerta" in briefing.lower() or "🚨" in briefing
|
|
or "precio anomalo" in briefing.lower()
|
|
or "ANOMALO" in briefing),
|
|
}
|
|
print(" Mencion del red flag en outputs:")
|
|
for agent, mentioned in flag_mentions.items():
|
|
print(f" {agent}: {'✅' if mentioned else '⚠️'}")
|
|
|
|
print()
|
|
print("─" * 70)
|
|
print("BUG 3 — Anomaly Detection")
|
|
print("─" * 70)
|
|
print(f" has_anomalies: {anomalies.get('has_anomalies')}")
|
|
print(f" is_critical: {anomalies.get('is_critical')}")
|
|
print(f" count: {anomalies.get('anomaly_count')} "
|
|
f"(HIGH={anomalies.get('high_severity_count')}, MEDIUM={anomalies.get('medium_severity_count')})")
|
|
if anomalies.get("flagged_metrics"):
|
|
print(" flagged_metrics:")
|
|
for f in anomalies["flagged_metrics"]:
|
|
print(f" - {f['scenario']} / {f['metric']} = {f['value']} ({f['severity']})")
|
|
bug3_python_pass = anomalies.get("has_anomalies", False)
|
|
print(f" → Bug 3 Python detection: {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")
|
|
|
|
# ¿DealAnalyzer incluyo la seccion obligatoria "Validacion de Inputs"?
|
|
has_validation_section = (
|
|
"validacion de inputs" in deal_an.lower()
|
|
or "validación de inputs" in deal_an.lower()
|
|
or "validar inputs" in deal_an.lower()
|
|
)
|
|
print(f" DealAnalyzer incluye '## ⚠️ Validacion de Inputs Requerida': "
|
|
f"{'✅ PASS' if has_validation_section else '❌ FAIL — modelo se la salteo'}")
|
|
|
|
print()
|
|
print("─" * 70)
|
|
print("BUG 1 — Exhaustividad (mínimo 400 palabras por agente técnico)")
|
|
print("─" * 70)
|
|
word_counts = {
|
|
"DealAnalyzer": count_words(deal_an),
|
|
"FloridaResearcher": count_words(research),
|
|
"LenderMatcher": count_words(lender),
|
|
"Coordinator": count_words(coord),
|
|
"ValueEstimator": count_words(value_est),
|
|
"OfferStrategist": count_words(offer_str),
|
|
}
|
|
bug1_pass = True
|
|
for agent, wc in word_counts.items():
|
|
symbol = "✅" if wc >= 400 else "⚠️"
|
|
print(f" {agent}: {wc} palabras {symbol}")
|
|
if wc < 400:
|
|
bug1_pass = False
|
|
# ContextualGlossaryAgent SI puede ser mas corto (es el briefing)
|
|
print(f" ContextualGlossaryAgent (briefing — sin minimo): {count_words(briefing)} palabras")
|
|
print(f" → Bug 1 exhaustividad: "
|
|
f"{'✅ PASS' if bug1_pass else '⚠️ PARTIAL (algunos agentes <400)'}")
|
|
|
|
print()
|
|
print("=" * 70)
|
|
print("RESUMEN GLOBAL")
|
|
print("=" * 70)
|
|
print(f" Bug 1 (exhaustividad): {'✅ PASS' if bug1_pass else '⚠️ PARTIAL'}")
|
|
print(f" Bug 2 (price red flag): {'✅ PASS' if bug2_pass else '❌ FAIL'}")
|
|
print(f" Bug 3 (anomalias): {'✅ PASS' if bug3_python_pass else '❌ FAIL'}")
|
|
print(f" Bug 3 (LLM secciona): {'✅ PASS' if has_validation_section else '❌ FAIL'}")
|
|
print(f" Total: {elapsed:.0f}s")
|
|
|
|
# Excerpt de cada agente para ojo humano
|
|
print()
|
|
print("=" * 70)
|
|
print("EXCERPTS (primeras 500 chars de cada agente)")
|
|
print("=" * 70)
|
|
for name, text in [
|
|
("DealAnalyzer", deal_an),
|
|
("Coordinator", coord),
|
|
("ContextualGlossaryAgent", briefing),
|
|
]:
|
|
print(f"\n─── {name} ───")
|
|
print(text[:500] + ("..." if len(text) > 500 else ""))
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|