AR-House/property_researcher.py

"""property_researcher.py — Reasoning loop que hace research autónomo de properties.

PRINCIPIO:
La IA local (Ollama PropertyResearcher) decide qué tools llamar para investigar
un deal. Loop hasta que llama `finish` o hits max iterations.

USO:
    from property_researcher import research_deal
    result = research_deal(
        deal_id=349,
        max_iterations=15,
        status_cb=print,
    )
    # → {summary, portals_used, documents_saved, findings, iterations, elapsed_seconds}
"""
from __future__ import annotations

import json
import time
import traceback
from typing import Callable, Optional

from agent_tools import OLLAMA_TOOL_SPECS, TOOL_DISPATCH


# ────────────────────────────────────────────────────────────────────────────
# Constants
# ────────────────────────────────────────────────────────────────────────────

MODEL_NAME = "PropertyResearcher"
DEFAULT_MAX_ITERATIONS = 15
TOOL_TIMEOUT_SECONDS = 60  # cap per tool call


# ────────────────────────────────────────────────────────────────────────────
# Main entry point
# ────────────────────────────────────────────────────────────────────────────

def research_deal(
    deal_id: int,
    max_iterations: int = DEFAULT_MAX_ITERATIONS,
    status_cb: Optional[Callable[[str], None]] = None,
) -> dict:
    """Run autonomous research loop on a deal.

    Returns:
        {
          "deal_id": int,
          "summary": str,
          "portals_used": [str],
          "documents_saved": [str],
          "findings": dict,
          "iterations": int,
          "elapsed_seconds": float,
          "errors": [str],
          "finished_cleanly": bool,
          "tool_calls_log": [dict]   # detailed history per iteration
        }
    """
    t0 = time.perf_counter()

    def _log(msg: str) -> None:
        if status_cb:
            status_cb(msg)

    try:
        import ollama
    except ImportError:
        return _error_result(deal_id, "ollama package not installed", t0)

    # Load deal context
    try:
        from deals_db import init_db, get_deal_by_id
        init_db()
        deal = get_deal_by_id(deal_id)
        if not deal:
            return _error_result(deal_id, f"deal_id={deal_id} not found in deals.db", t0)
    except Exception as e:
        return _error_result(deal_id, f"deal load failed: {e}", t0)

    # Build the initial user task description
    task = _build_task_description(deal)
    _log(f"[researcher] Task: {task[:120]}")

    # Conversation history (multi-turn with tool results fed back)
    messages: list[dict] = [
        {"role": "user", "content": task},
    ]

    findings: dict = {}
    portals_used: list[str] = []
    documents_saved: list[str] = []
    errors: list[str] = []
    tool_calls_log: list[dict] = []
    finished_cleanly = False
    final_summary = ""

    for iteration in range(1, max_iterations + 1):
        _log(f"[researcher] iter {iteration}/{max_iterations}: thinking...")

        try:
            response = ollama.chat(
                model=MODEL_NAME,
                messages=messages,
                tools=OLLAMA_TOOL_SPECS,
                options={"temperature": 0.2, "num_ctx": 16384},
            )
        except Exception as e:
            errors.append(f"iter {iteration} ollama.chat failed: {e}")
            _log(f"[researcher]   ERROR: {e}")
            break

        msg = response.get("message", {})
        # Append assistant message to history
        messages.append({"role": "assistant", "content": msg.get("content", ""),
                         "tool_calls": msg.get("tool_calls", [])})

        tool_calls = msg.get("tool_calls", []) or []
        if not tool_calls:
            # No more tools — model decided to finish without calling finish()
            content = msg.get("content", "").strip()
            _log(f"[researcher] no tool_calls; content={content[:200]}")
            if content:
                final_summary = content
            break

        # Execute each tool call sequentially
        for tc in tool_calls:
            fn = tc.get("function", {})
            name = fn.get("name", "")
            args_raw = fn.get("arguments", {})
            args = args_raw if isinstance(args_raw, dict) else _safe_json_loads(args_raw)

            _log(f"[researcher]   → calling {name}({_short_args(args)})")

            # Special-case: finish() ends the loop
            if name == "finish":
                final_summary = args.get("summary", "")
                portals_used = args.get("portals_used", [])
                documents_saved = args.get("documents_saved", [])
                findings = args.get("findings", {})
                finished_cleanly = True
                tool_calls_log.append({"iteration": iteration, "name": name, "args": args})
                _log(f"[researcher]   FINISH: {final_summary[:120]}")
                break

            # Auto-inject deal_id for save/download tools if missing
            if name in ("save_document", "download_pdf") and "deal_id" not in args:
                args["deal_id"] = deal_id

            # Execute the tool
            tool_result = _execute_tool(name, args)
            tool_calls_log.append({
                "iteration": iteration, "name": name,
                "args": _short_args(args, max_len=300),
                "result": _short_result(tool_result),
            })

            # Track outputs of interest
            if name == "save_document" or name == "download_pdf":
                if "saved_to" in tool_result:
                    documents_saved.append(tool_result["saved_to"])
            elif name == "remember_portal":
                if tool_result.get("ok"):
                    portals_used.append(tool_result.get("url", ""))

            # Feed tool result back to the model
            tool_result_str = json.dumps(tool_result, default=str)[:3000]
            messages.append({"role": "tool", "content": tool_result_str, "name": name})

        if finished_cleanly:
            break

    elapsed = time.perf_counter() - t0
    return {
        "deal_id": deal_id,
        "summary": final_summary or "Research completed without explicit summary",
        "portals_used": portals_used,
        "documents_saved": documents_saved,
        "findings": findings,
        "iterations": iteration,
        "elapsed_seconds": round(elapsed, 1),
        "errors": errors,
        "finished_cleanly": finished_cleanly,
        "tool_calls_log": tool_calls_log,
    }


# ────────────────────────────────────────────────────────────────────────────
# Helpers
# ────────────────────────────────────────────────────────────────────────────

def _build_task_description(deal: dict) -> str:
    """Build the initial user prompt describing what to research."""
    parts = [
        f"Research this property deal:",
        f"- deal_id: {deal['id']}",
        f"- Address: {deal.get('address') or '?'}",
        f"- County: {deal.get('county') or '?'}",
        f"- State: {deal.get('state') or '?'}",
        f"- Deal type: {deal.get('deal_type') or '?'}",
    ]
    if deal.get("case_number"):
        parts.append(f"- Case #: {deal['case_number']}")
    if deal.get("parcel_id"):
        parts.append(f"- Parcel ID: {deal['parcel_id']}")
    if deal.get("listing_price"):
        parts.append(f"- Listing price: ${deal['listing_price']:,.0f}")

    parts.append("")
    parts.append("Goal: find and document the 3 BASIC items for pre-screening:")
    parts.append("  1. Property Appraiser data (owner, assessed value, year built, sqft)")
    parts.append("  2. Court records (plaintiff/defendant if foreclosure, or owner if MLS)")
    parts.append("  3. Property photo (if available from PA or other free source)")
    parts.append("")
    parts.append("Save what you find via save_document. Call remember_portal for working portals.")
    parts.append("When done, call finish() with a summary.")
    return "\n".join(parts)


def _execute_tool(name: str, args: dict) -> dict:
    """Dispatch to the actual Python function."""
    fn = TOOL_DISPATCH.get(name)
    if not fn:
        return {"error": f"unknown tool: {name}"}
    try:
        # Filter args to those the function accepts (defensive)
        return fn(**args)
    except TypeError as e:
        return {"error": f"bad args for {name}: {e}"}
    except Exception as e:
        return {"error": f"{name} crashed: {type(e).__name__}: {e}",
                "trace": traceback.format_exc()[:500]}


def _safe_json_loads(s) -> dict:
    if not s:
        return {}
    if isinstance(s, dict):
        return s
    try:
        return json.loads(s)
    except Exception:
        return {}


def _short_args(args: dict, max_len: int = 100) -> str:
    s = json.dumps(args, default=str)
    return s[:max_len] + ("..." if len(s) > max_len else "")


def _short_result(result: dict) -> dict:
    """Truncate large fields so logs don't explode."""
    short = {}
    for k, v in result.items():
        if isinstance(v, str) and len(v) > 200:
            short[k] = v[:200] + f"...({len(v)} chars)"
        elif isinstance(v, list) and len(v) > 5:
            short[k] = v[:5] + [f"...({len(v)} items)"]
        else:
            short[k] = v
    return short


def _error_result(deal_id: int, msg: str, t0: float) -> dict:
    return {
        "deal_id": deal_id,
        "summary": "",
        "portals_used": [],
        "documents_saved": [],
        "findings": {},
        "iterations": 0,
        "elapsed_seconds": round(time.perf_counter() - t0, 1),
        "errors": [msg],
        "finished_cleanly": False,
        "tool_calls_log": [],
    }