"""properties_store.py — File-system store para Due Diligence documents. Cada deal scrapeado (especialmente distressed: foreclosure, tax_deed, reo, auction) necesita una carpeta dedicada para acumular documentos del DD: properties/{state}/{county_slug}/{deal_type}/{address_or_case_slug}/ analysis_report.json — snapshot del último análisis multi-agente briefing_ejecutivo.md — Markdown del briefing del ContextualGlossaryAgent notes.md — Notas libres del usuario (free-form) deeds/ — Escrituras, deed transfers liens/ — Lien inventories, IRS NFTLs, code violations court_records/ — Lis pendens, civil suits, bankruptcy property_appraiser/ — Tax assessment, exemptions, sketches photos/ — Photos descargadas (Zillow CDN, PA sketches, etc.) due_diligence/ — Reporte DD final del DueDiligenceAgent offers/ — Letters of intent, contratos draft, contraoffers .meta.json — Metadata: deal_id, created_at, last_dd_run_at API publica: get_property_folder(deal) → Path — devuelve ruta (no crea) ensure_property_folder(deal) → Path — crea estructura completa write_meta(folder, deal_id, **kwargs) → None — actualiza .meta.json list_property_folders() → list[dict] — para UI Mis Propiedades save_json(folder / "X.json", data) → None — helper para persist save_markdown(folder / "X.md", text) → None — helper para persist """ from __future__ import annotations import json import re import time from datetime import datetime, timezone from pathlib import Path from typing import Optional # ──────────────────────────────────────────────────────────────────────────── # Paths # ──────────────────────────────────────────────────────────────────────────── PROJECT_ROOT = Path(__file__).resolve().parent PROPERTIES_ROOT = PROJECT_ROOT / "properties" SUBFOLDERS = ( "deeds", "liens", "court_records", "property_appraiser", "photos", "due_diligence", "offers", ) # ──────────────────────────────────────────────────────────────────────────── # Slug helpers # ──────────────────────────────────────────────────────────────────────────── def _slug(s: str, max_len: int = 80) -> str: """Sanitize a string for filesystem use. Keeps [A-Za-z0-9-_.] only, collapses runs of underscores, caps length. """ if not s: return "unknown" s = s.strip() # Replace runs of non-allowed chars with underscore s = re.sub(r"[^A-Za-z0-9\-_.]+", "_", s) s = re.sub(r"_+", "_", s).strip("_-.") return (s[:max_len] or "unknown") def _state_slug(state: Optional[str]) -> str: s = (state or "FL").strip().upper() return s if re.match(r"^[A-Z]{2}$", s) else "FL" def _county_slug(county: Optional[str]) -> str: c = (county or "Unknown").strip() c = c.replace(" County", "").strip() return _slug(c.replace("-", "_").replace(" ", "_")) def _deal_type_slug(deal_type: Optional[str]) -> str: dt = (deal_type or "unknown").strip().lower() # Whitelist canon values canon = {"foreclosure", "tax_deed", "auction", "reo", "mls", "off_market"} return dt if dt in canon else "unknown" def _identifier_slug(deal: dict) -> str: """Pick the best identifier for the deal's folder name. Priority: case_number > parcel_id > address > deal_hash > deal_id. """ case_number = (deal.get("case_number") or "").strip() if case_number: return f"case_{_slug(case_number)}" parcel_id = (deal.get("parcel_id") or "").strip() if parcel_id: return f"parcel_{_slug(parcel_id)}" address = (deal.get("address") or "").strip() if address: return f"addr_{_slug(address.upper())}" deal_hash = (deal.get("deal_hash") or "").strip() if deal_hash: return f"hash_{_slug(deal_hash)}" return f"id_{deal.get('id', 'unknown')}" # ──────────────────────────────────────────────────────────────────────────── # Folder API # ──────────────────────────────────────────────────────────────────────────── def get_property_folder(deal: dict) -> Path: """Devuelve la ruta de la carpeta del deal (NO crea). Format: properties/{STATE}/{County_Slug}/{deal_type}/{identifier_slug}/ """ state = _state_slug(deal.get("state")) county = _county_slug(deal.get("county")) dtype = _deal_type_slug(deal.get("deal_type")) ident = _identifier_slug(deal) return PROPERTIES_ROOT / state / county / dtype / ident def ensure_property_folder(deal: dict) -> Path: """Crea la estructura completa (folder + subfolders + .meta.json si no existe). Idempotent: si ya existe, devuelve el path sin tocar nada (except updating last_seen). """ folder = get_property_folder(deal) folder.mkdir(parents=True, exist_ok=True) # Create subfolders for sub in SUBFOLDERS: (folder / sub).mkdir(exist_ok=True) # Initialize .meta.json if not present meta_path = folder / ".meta.json" if not meta_path.exists(): meta = { "deal_id": deal.get("id"), "deal_hash": deal.get("deal_hash"), "source": deal.get("source"), "address": deal.get("address"), "county": deal.get("county"), "state": deal.get("state"), "deal_type": deal.get("deal_type"), "case_number": deal.get("case_number"), "parcel_id": deal.get("parcel_id"), "created_at": datetime.now(timezone.utc).isoformat(), "last_dd_run_at": None, "last_dd_kind": None, # "pre_dd" | "full_dd" } meta_path.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8") # Initialize notes.md if not present (so the user always has a place to write) notes_path = folder / "notes.md" if not notes_path.exists(): notes_path.write_text( f"# Notas — {deal.get('address') or '(sin direccion)'}\n\n" f"Carpeta creada: {datetime.now(timezone.utc).isoformat()}\n\n" f"## Escribí acá lo que quieras recordar sobre este deal.\n\n", encoding="utf-8", ) return folder def write_meta(folder: Path, **updates) -> None: """Merges updates into .meta.json (preserves existing fields).""" meta_path = folder / ".meta.json" if meta_path.exists(): try: data = json.loads(meta_path.read_text(encoding="utf-8")) except Exception: data = {} else: data = {} data.update(updates) data["last_updated_at"] = datetime.now(timezone.utc).isoformat() meta_path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") def save_json(path: Path, data) -> None: """Serialize data as JSON into path (creates parent dirs if needed).""" path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8") def save_markdown(path: Path, text: str) -> None: """Save markdown text into path.""" path.parent.mkdir(parents=True, exist_ok=True) path.write_text(text, encoding="utf-8") # ──────────────────────────────────────────────────────────────────────────── # Listing / discovery # ──────────────────────────────────────────────────────────────────────────── def list_property_folders() -> list[dict]: """Lista todas las carpetas de propiedad existentes. Returns list[dict] con: state, county, deal_type, identifier, folder_path, meta (parsed .meta.json), file_count, last_modified_at. """ out: list[dict] = [] if not PROPERTIES_ROOT.exists(): return out # Walk: properties/STATE/COUNTY/DEAL_TYPE/IDENT/ for state_dir in PROPERTIES_ROOT.iterdir(): if not state_dir.is_dir(): continue for county_dir in state_dir.iterdir(): if not county_dir.is_dir(): continue for dtype_dir in county_dir.iterdir(): if not dtype_dir.is_dir(): continue for ident_dir in dtype_dir.iterdir(): if not ident_dir.is_dir(): continue # Parse .meta.json meta_path = ident_dir / ".meta.json" meta = {} if meta_path.exists(): try: meta = json.loads(meta_path.read_text(encoding="utf-8")) except Exception: meta = {} # Count files (recursive) file_count = sum(1 for _ in ident_dir.rglob("*") if _.is_file()) # Last modified last_mod = max( (p.stat().st_mtime for p in ident_dir.rglob("*") if p.is_file()), default=ident_dir.stat().st_mtime, ) out.append({ "state": state_dir.name, "county": county_dir.name, "deal_type": dtype_dir.name, "identifier": ident_dir.name, "folder_path": str(ident_dir), "meta": meta, "file_count": file_count, "last_modified_at": datetime.fromtimestamp(last_mod, timezone.utc).isoformat(), }) # Sort by last modified, newest first out.sort(key=lambda x: x["last_modified_at"], reverse=True) return out def find_property_folder_by_deal_id(deal_id: int) -> Optional[Path]: """Encuentra la carpeta de una propiedad por deal_id (lee .meta.json).""" for entry in list_property_folders(): if entry["meta"].get("deal_id") == deal_id: return Path(entry["folder_path"]) return None