Files
AR-House/properties_store.py
2026-07-03 12:24:58 -04:00

267 lines
11 KiB
Python

"""properties_store.py — File-system store para Due Diligence documents.
Cada deal scrapeado (especialmente distressed: foreclosure, tax_deed, reo, auction)
necesita una carpeta dedicada para acumular documentos del DD:
properties/{state}/{county_slug}/{deal_type}/{address_or_case_slug}/
analysis_report.json — snapshot del último análisis multi-agente
briefing_ejecutivo.md — Markdown del briefing del ContextualGlossaryAgent
notes.md — Notas libres del usuario (free-form)
deeds/ — Escrituras, deed transfers
liens/ — Lien inventories, IRS NFTLs, code violations
court_records/ — Lis pendens, civil suits, bankruptcy
property_appraiser/ — Tax assessment, exemptions, sketches
photos/ — Photos descargadas (Zillow CDN, PA sketches, etc.)
due_diligence/ — Reporte DD final del DueDiligenceAgent
offers/ — Letters of intent, contratos draft, contraoffers
.meta.json — Metadata: deal_id, created_at, last_dd_run_at
API publica:
get_property_folder(deal) → Path — devuelve ruta (no crea)
ensure_property_folder(deal) → Path — crea estructura completa
write_meta(folder, deal_id, **kwargs) → None — actualiza .meta.json
list_property_folders() → list[dict] — para UI Mis Propiedades
save_json(folder / "X.json", data) → None — helper para persist
save_markdown(folder / "X.md", text) → None — helper para persist
"""
from __future__ import annotations
import json
import re
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
# ────────────────────────────────────────────────────────────────────────────
# Paths
# ────────────────────────────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).resolve().parent
PROPERTIES_ROOT = PROJECT_ROOT / "properties"
SUBFOLDERS = (
"deeds",
"liens",
"court_records",
"property_appraiser",
"photos",
"due_diligence",
"offers",
)
# ────────────────────────────────────────────────────────────────────────────
# Slug helpers
# ────────────────────────────────────────────────────────────────────────────
def _slug(s: str, max_len: int = 80) -> str:
"""Sanitize a string for filesystem use.
Keeps [A-Za-z0-9-_.] only, collapses runs of underscores, caps length.
"""
if not s:
return "unknown"
s = s.strip()
# Replace runs of non-allowed chars with underscore
s = re.sub(r"[^A-Za-z0-9\-_.]+", "_", s)
s = re.sub(r"_+", "_", s).strip("_-.")
return (s[:max_len] or "unknown")
def _state_slug(state: Optional[str]) -> str:
s = (state or "FL").strip().upper()
return s if re.match(r"^[A-Z]{2}$", s) else "FL"
def _county_slug(county: Optional[str]) -> str:
c = (county or "Unknown").strip()
c = c.replace(" County", "").strip()
return _slug(c.replace("-", "_").replace(" ", "_"))
def _deal_type_slug(deal_type: Optional[str]) -> str:
dt = (deal_type or "unknown").strip().lower()
# Whitelist canon values
canon = {"foreclosure", "tax_deed", "auction", "reo", "mls", "off_market"}
return dt if dt in canon else "unknown"
def _identifier_slug(deal: dict) -> str:
"""Pick the best identifier for the deal's folder name.
Priority: case_number > parcel_id > address > deal_hash > deal_id.
"""
case_number = (deal.get("case_number") or "").strip()
if case_number:
return f"case_{_slug(case_number)}"
parcel_id = (deal.get("parcel_id") or "").strip()
if parcel_id:
return f"parcel_{_slug(parcel_id)}"
address = (deal.get("address") or "").strip()
if address:
return f"addr_{_slug(address.upper())}"
deal_hash = (deal.get("deal_hash") or "").strip()
if deal_hash:
return f"hash_{_slug(deal_hash)}"
return f"id_{deal.get('id', 'unknown')}"
# ────────────────────────────────────────────────────────────────────────────
# Folder API
# ────────────────────────────────────────────────────────────────────────────
def get_property_folder(deal: dict) -> Path:
"""Devuelve la ruta de la carpeta del deal (NO crea).
Format: properties/{STATE}/{County_Slug}/{deal_type}/{identifier_slug}/
"""
state = _state_slug(deal.get("state"))
county = _county_slug(deal.get("county"))
dtype = _deal_type_slug(deal.get("deal_type"))
ident = _identifier_slug(deal)
return PROPERTIES_ROOT / state / county / dtype / ident
def ensure_property_folder(deal: dict) -> Path:
"""Crea la estructura completa (folder + subfolders + .meta.json si no existe).
Idempotent: si ya existe, devuelve el path sin tocar nada (except updating
last_seen).
"""
folder = get_property_folder(deal)
folder.mkdir(parents=True, exist_ok=True)
# Create subfolders
for sub in SUBFOLDERS:
(folder / sub).mkdir(exist_ok=True)
# Initialize .meta.json if not present
meta_path = folder / ".meta.json"
if not meta_path.exists():
meta = {
"deal_id": deal.get("id"),
"deal_hash": deal.get("deal_hash"),
"source": deal.get("source"),
"address": deal.get("address"),
"county": deal.get("county"),
"state": deal.get("state"),
"deal_type": deal.get("deal_type"),
"case_number": deal.get("case_number"),
"parcel_id": deal.get("parcel_id"),
"created_at": datetime.now(timezone.utc).isoformat(),
"last_dd_run_at": None,
"last_dd_kind": None, # "pre_dd" | "full_dd"
}
meta_path.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")
# Initialize notes.md if not present (so the user always has a place to write)
notes_path = folder / "notes.md"
if not notes_path.exists():
notes_path.write_text(
f"# Notas — {deal.get('address') or '(sin direccion)'}\n\n"
f"Carpeta creada: {datetime.now(timezone.utc).isoformat()}\n\n"
f"## Escribí acá lo que quieras recordar sobre este deal.\n\n",
encoding="utf-8",
)
return folder
def write_meta(folder: Path, **updates) -> None:
"""Merges updates into .meta.json (preserves existing fields)."""
meta_path = folder / ".meta.json"
if meta_path.exists():
try:
data = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
data = {}
else:
data = {}
data.update(updates)
data["last_updated_at"] = datetime.now(timezone.utc).isoformat()
meta_path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
def save_json(path: Path, data) -> None:
"""Serialize data as JSON into path (creates parent dirs if needed)."""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8")
def save_markdown(path: Path, text: str) -> None:
"""Save markdown text into path."""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text, encoding="utf-8")
# ────────────────────────────────────────────────────────────────────────────
# Listing / discovery
# ────────────────────────────────────────────────────────────────────────────
def list_property_folders() -> list[dict]:
"""Lista todas las carpetas de propiedad existentes.
Returns list[dict] con: state, county, deal_type, identifier, folder_path,
meta (parsed .meta.json), file_count, last_modified_at.
"""
out: list[dict] = []
if not PROPERTIES_ROOT.exists():
return out
# Walk: properties/STATE/COUNTY/DEAL_TYPE/IDENT/
for state_dir in PROPERTIES_ROOT.iterdir():
if not state_dir.is_dir():
continue
for county_dir in state_dir.iterdir():
if not county_dir.is_dir():
continue
for dtype_dir in county_dir.iterdir():
if not dtype_dir.is_dir():
continue
for ident_dir in dtype_dir.iterdir():
if not ident_dir.is_dir():
continue
# Parse .meta.json
meta_path = ident_dir / ".meta.json"
meta = {}
if meta_path.exists():
try:
meta = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
meta = {}
# Count files (recursive)
file_count = sum(1 for _ in ident_dir.rglob("*") if _.is_file())
# Last modified
last_mod = max(
(p.stat().st_mtime for p in ident_dir.rglob("*") if p.is_file()),
default=ident_dir.stat().st_mtime,
)
out.append({
"state": state_dir.name,
"county": county_dir.name,
"deal_type": dtype_dir.name,
"identifier": ident_dir.name,
"folder_path": str(ident_dir),
"meta": meta,
"file_count": file_count,
"last_modified_at": datetime.fromtimestamp(last_mod, timezone.utc).isoformat(),
})
# Sort by last modified, newest first
out.sort(key=lambda x: x["last_modified_at"], reverse=True)
return out
def find_property_folder_by_deal_id(deal_id: int) -> Optional[Path]:
"""Encuentra la carpeta de una propiedad por deal_id (lee .meta.json)."""
for entry in list_property_folders():
if entry["meta"].get("deal_id") == deal_id:
return Path(entry["folder_path"])
return None