feat: AR-House initial commit

2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
@@ -0,0 +1,277 @@
+"""property_researcher.py — Reasoning loop que hace research autónomo de properties.
+
+PRINCIPIO:
+La IA local (Ollama PropertyResearcher) decide qué tools llamar para investigar
+un deal. Loop hasta que llama `finish` o hits max iterations.
+
+USO:
+    from property_researcher import research_deal
+    result = research_deal(
+        deal_id=349,
+        max_iterations=15,
+        status_cb=print,
+    )
+    # → {summary, portals_used, documents_saved, findings, iterations, elapsed_seconds}
+"""
+from __future__ import annotations
+
+import json
+import time
+import traceback
+from typing import Callable, Optional
+
+from agent_tools import OLLAMA_TOOL_SPECS, TOOL_DISPATCH
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Constants
+# ────────────────────────────────────────────────────────────────────────────
+
+MODEL_NAME = "PropertyResearcher"
+DEFAULT_MAX_ITERATIONS = 15
+TOOL_TIMEOUT_SECONDS = 60  # cap per tool call
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Main entry point
+# ────────────────────────────────────────────────────────────────────────────
+
+def research_deal(
+    deal_id: int,
+    max_iterations: int = DEFAULT_MAX_ITERATIONS,
+    status_cb: Optional[Callable[[str], None]] = None,
+) -> dict:
+    """Run autonomous research loop on a deal.
+
+    Returns:
+        {
+          "deal_id": int,
+          "summary": str,
+          "portals_used": [str],
+          "documents_saved": [str],
+          "findings": dict,
+          "iterations": int,
+          "elapsed_seconds": float,
+          "errors": [str],
+          "finished_cleanly": bool,
+          "tool_calls_log": [dict]   # detailed history per iteration
+        }
+    """
+    t0 = time.perf_counter()
+
+    def _log(msg: str) -> None:
+        if status_cb:
+            status_cb(msg)
+
+    try:
+        import ollama
+    except ImportError:
+        return _error_result(deal_id, "ollama package not installed", t0)
+
+    # Load deal context
+    try:
+        from deals_db import init_db, get_deal_by_id
+        init_db()
+        deal = get_deal_by_id(deal_id)
+        if not deal:
+            return _error_result(deal_id, f"deal_id={deal_id} not found in deals.db", t0)
+    except Exception as e:
+        return _error_result(deal_id, f"deal load failed: {e}", t0)
+
+    # Build the initial user task description
+    task = _build_task_description(deal)
+    _log(f"[researcher] Task: {task[:120]}")
+
+    # Conversation history (multi-turn with tool results fed back)
+    messages: list[dict] = [
+        {"role": "user", "content": task},
+    ]
+
+    findings: dict = {}
+    portals_used: list[str] = []
+    documents_saved: list[str] = []
+    errors: list[str] = []
+    tool_calls_log: list[dict] = []
+    finished_cleanly = False
+    final_summary = ""
+
+    for iteration in range(1, max_iterations + 1):
+        _log(f"[researcher] iter {iteration}/{max_iterations}: thinking...")
+
+        try:
+            response = ollama.chat(
+                model=MODEL_NAME,
+                messages=messages,
+                tools=OLLAMA_TOOL_SPECS,
+                options={"temperature": 0.2, "num_ctx": 16384},
+            )
+        except Exception as e:
+            errors.append(f"iter {iteration} ollama.chat failed: {e}")
+            _log(f"[researcher]   ERROR: {e}")
+            break
+
+        msg = response.get("message", {})
+        # Append assistant message to history
+        messages.append({"role": "assistant", "content": msg.get("content", ""),
+                         "tool_calls": msg.get("tool_calls", [])})
+
+        tool_calls = msg.get("tool_calls", []) or []
+        if not tool_calls:
+            # No more tools — model decided to finish without calling finish()
+            content = msg.get("content", "").strip()
+            _log(f"[researcher] no tool_calls; content={content[:200]}")
+            if content:
+                final_summary = content
+            break
+
+        # Execute each tool call sequentially
+        for tc in tool_calls:
+            fn = tc.get("function", {})
+            name = fn.get("name", "")
+            args_raw = fn.get("arguments", {})
+            args = args_raw if isinstance(args_raw, dict) else _safe_json_loads(args_raw)
+
+            _log(f"[researcher]   → calling {name}({_short_args(args)})")
+
+            # Special-case: finish() ends the loop
+            if name == "finish":
+                final_summary = args.get("summary", "")
+                portals_used = args.get("portals_used", [])
+                documents_saved = args.get("documents_saved", [])
+                findings = args.get("findings", {})
+                finished_cleanly = True
+                tool_calls_log.append({"iteration": iteration, "name": name, "args": args})
+                _log(f"[researcher]   FINISH: {final_summary[:120]}")
+                break
+
+            # Auto-inject deal_id for save/download tools if missing
+            if name in ("save_document", "download_pdf") and "deal_id" not in args:
+                args["deal_id"] = deal_id
+
+            # Execute the tool
+            tool_result = _execute_tool(name, args)
+            tool_calls_log.append({
+                "iteration": iteration, "name": name,
+                "args": _short_args(args, max_len=300),
+                "result": _short_result(tool_result),
+            })
+
+            # Track outputs of interest
+            if name == "save_document" or name == "download_pdf":
+                if "saved_to" in tool_result:
+                    documents_saved.append(tool_result["saved_to"])
+            elif name == "remember_portal":
+                if tool_result.get("ok"):
+                    portals_used.append(tool_result.get("url", ""))
+
+            # Feed tool result back to the model
+            tool_result_str = json.dumps(tool_result, default=str)[:3000]
+            messages.append({"role": "tool", "content": tool_result_str, "name": name})
+
+        if finished_cleanly:
+            break
+
+    elapsed = time.perf_counter() - t0
+    return {
+        "deal_id": deal_id,
+        "summary": final_summary or "Research completed without explicit summary",
+        "portals_used": portals_used,
+        "documents_saved": documents_saved,
+        "findings": findings,
+        "iterations": iteration,
+        "elapsed_seconds": round(elapsed, 1),
+        "errors": errors,
+        "finished_cleanly": finished_cleanly,
+        "tool_calls_log": tool_calls_log,
+    }
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ────────────────────────────────────────────────────────────────────────────
+
+def _build_task_description(deal: dict) -> str:
+    """Build the initial user prompt describing what to research."""
+    parts = [
+        f"Research this property deal:",
+        f"- deal_id: {deal['id']}",
+        f"- Address: {deal.get('address') or '?'}",
+        f"- County: {deal.get('county') or '?'}",
+        f"- State: {deal.get('state') or '?'}",
+        f"- Deal type: {deal.get('deal_type') or '?'}",
+    ]
+    if deal.get("case_number"):
+        parts.append(f"- Case #: {deal['case_number']}")
+    if deal.get("parcel_id"):
+        parts.append(f"- Parcel ID: {deal['parcel_id']}")
+    if deal.get("listing_price"):
+        parts.append(f"- Listing price: ${deal['listing_price']:,.0f}")
+
+    parts.append("")
+    parts.append("Goal: find and document the 3 BASIC items for pre-screening:")
+    parts.append("  1. Property Appraiser data (owner, assessed value, year built, sqft)")
+    parts.append("  2. Court records (plaintiff/defendant if foreclosure, or owner if MLS)")
+    parts.append("  3. Property photo (if available from PA or other free source)")
+    parts.append("")
+    parts.append("Save what you find via save_document. Call remember_portal for working portals.")
+    parts.append("When done, call finish() with a summary.")
+    return "\n".join(parts)
+
+
+def _execute_tool(name: str, args: dict) -> dict:
+    """Dispatch to the actual Python function."""
+    fn = TOOL_DISPATCH.get(name)
+    if not fn:
+        return {"error": f"unknown tool: {name}"}
+    try:
+        # Filter args to those the function accepts (defensive)
+        return fn(**args)
+    except TypeError as e:
+        return {"error": f"bad args for {name}: {e}"}
+    except Exception as e:
+        return {"error": f"{name} crashed: {type(e).__name__}: {e}",
+                "trace": traceback.format_exc()[:500]}
+
+
+def _safe_json_loads(s) -> dict:
+    if not s:
+        return {}
+    if isinstance(s, dict):
+        return s
+    try:
+        return json.loads(s)
+    except Exception:
+        return {}
+
+
+def _short_args(args: dict, max_len: int = 100) -> str:
+    s = json.dumps(args, default=str)
+    return s[:max_len] + ("..." if len(s) > max_len else "")
+
+
+def _short_result(result: dict) -> dict:
+    """Truncate large fields so logs don't explode."""
+    short = {}
+    for k, v in result.items():
+        if isinstance(v, str) and len(v) > 200:
+            short[k] = v[:200] + f"...({len(v)} chars)"
+        elif isinstance(v, list) and len(v) > 5:
+            short[k] = v[:5] + [f"...({len(v)} items)"]
+        else:
+            short[k] = v
+    return short
+
+
+def _error_result(deal_id: int, msg: str, t0: float) -> dict:
+    return {
+        "deal_id": deal_id,
+        "summary": "",
+        "portals_used": [],
+        "documents_saved": [],
+        "findings": {},
+        "iterations": 0,
+        "elapsed_seconds": round(time.perf_counter() - t0, 1),
+        "errors": [msg],
+        "finished_cleanly": False,
+        "tool_calls_log": [],
+    }