267 lines
11 KiB
Python
267 lines
11 KiB
Python
"""properties_store.py — File-system store para Due Diligence documents.
|
|
|
|
Cada deal scrapeado (especialmente distressed: foreclosure, tax_deed, reo, auction)
|
|
necesita una carpeta dedicada para acumular documentos del DD:
|
|
|
|
properties/{state}/{county_slug}/{deal_type}/{address_or_case_slug}/
|
|
analysis_report.json — snapshot del último análisis multi-agente
|
|
briefing_ejecutivo.md — Markdown del briefing del ContextualGlossaryAgent
|
|
notes.md — Notas libres del usuario (free-form)
|
|
deeds/ — Escrituras, deed transfers
|
|
liens/ — Lien inventories, IRS NFTLs, code violations
|
|
court_records/ — Lis pendens, civil suits, bankruptcy
|
|
property_appraiser/ — Tax assessment, exemptions, sketches
|
|
photos/ — Photos descargadas (Zillow CDN, PA sketches, etc.)
|
|
due_diligence/ — Reporte DD final del DueDiligenceAgent
|
|
offers/ — Letters of intent, contratos draft, contraoffers
|
|
.meta.json — Metadata: deal_id, created_at, last_dd_run_at
|
|
|
|
API publica:
|
|
get_property_folder(deal) → Path — devuelve ruta (no crea)
|
|
ensure_property_folder(deal) → Path — crea estructura completa
|
|
write_meta(folder, deal_id, **kwargs) → None — actualiza .meta.json
|
|
list_property_folders() → list[dict] — para UI Mis Propiedades
|
|
save_json(folder / "X.json", data) → None — helper para persist
|
|
save_markdown(folder / "X.md", text) → None — helper para persist
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# Paths
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent
|
|
PROPERTIES_ROOT = PROJECT_ROOT / "properties"
|
|
|
|
|
|
SUBFOLDERS = (
|
|
"deeds",
|
|
"liens",
|
|
"court_records",
|
|
"property_appraiser",
|
|
"photos",
|
|
"due_diligence",
|
|
"offers",
|
|
)
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# Slug helpers
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
def _slug(s: str, max_len: int = 80) -> str:
|
|
"""Sanitize a string for filesystem use.
|
|
|
|
Keeps [A-Za-z0-9-_.] only, collapses runs of underscores, caps length.
|
|
"""
|
|
if not s:
|
|
return "unknown"
|
|
s = s.strip()
|
|
# Replace runs of non-allowed chars with underscore
|
|
s = re.sub(r"[^A-Za-z0-9\-_.]+", "_", s)
|
|
s = re.sub(r"_+", "_", s).strip("_-.")
|
|
return (s[:max_len] or "unknown")
|
|
|
|
|
|
def _state_slug(state: Optional[str]) -> str:
|
|
s = (state or "FL").strip().upper()
|
|
return s if re.match(r"^[A-Z]{2}$", s) else "FL"
|
|
|
|
|
|
def _county_slug(county: Optional[str]) -> str:
|
|
c = (county or "Unknown").strip()
|
|
c = c.replace(" County", "").strip()
|
|
return _slug(c.replace("-", "_").replace(" ", "_"))
|
|
|
|
|
|
def _deal_type_slug(deal_type: Optional[str]) -> str:
|
|
dt = (deal_type or "unknown").strip().lower()
|
|
# Whitelist canon values
|
|
canon = {"foreclosure", "tax_deed", "auction", "reo", "mls", "off_market"}
|
|
return dt if dt in canon else "unknown"
|
|
|
|
|
|
def _identifier_slug(deal: dict) -> str:
|
|
"""Pick the best identifier for the deal's folder name.
|
|
|
|
Priority: case_number > parcel_id > address > deal_hash > deal_id.
|
|
"""
|
|
case_number = (deal.get("case_number") or "").strip()
|
|
if case_number:
|
|
return f"case_{_slug(case_number)}"
|
|
|
|
parcel_id = (deal.get("parcel_id") or "").strip()
|
|
if parcel_id:
|
|
return f"parcel_{_slug(parcel_id)}"
|
|
|
|
address = (deal.get("address") or "").strip()
|
|
if address:
|
|
return f"addr_{_slug(address.upper())}"
|
|
|
|
deal_hash = (deal.get("deal_hash") or "").strip()
|
|
if deal_hash:
|
|
return f"hash_{_slug(deal_hash)}"
|
|
|
|
return f"id_{deal.get('id', 'unknown')}"
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# Folder API
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
def get_property_folder(deal: dict) -> Path:
|
|
"""Devuelve la ruta de la carpeta del deal (NO crea).
|
|
|
|
Format: properties/{STATE}/{County_Slug}/{deal_type}/{identifier_slug}/
|
|
"""
|
|
state = _state_slug(deal.get("state"))
|
|
county = _county_slug(deal.get("county"))
|
|
dtype = _deal_type_slug(deal.get("deal_type"))
|
|
ident = _identifier_slug(deal)
|
|
return PROPERTIES_ROOT / state / county / dtype / ident
|
|
|
|
|
|
def ensure_property_folder(deal: dict) -> Path:
|
|
"""Crea la estructura completa (folder + subfolders + .meta.json si no existe).
|
|
|
|
Idempotent: si ya existe, devuelve el path sin tocar nada (except updating
|
|
last_seen).
|
|
"""
|
|
folder = get_property_folder(deal)
|
|
folder.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create subfolders
|
|
for sub in SUBFOLDERS:
|
|
(folder / sub).mkdir(exist_ok=True)
|
|
|
|
# Initialize .meta.json if not present
|
|
meta_path = folder / ".meta.json"
|
|
if not meta_path.exists():
|
|
meta = {
|
|
"deal_id": deal.get("id"),
|
|
"deal_hash": deal.get("deal_hash"),
|
|
"source": deal.get("source"),
|
|
"address": deal.get("address"),
|
|
"county": deal.get("county"),
|
|
"state": deal.get("state"),
|
|
"deal_type": deal.get("deal_type"),
|
|
"case_number": deal.get("case_number"),
|
|
"parcel_id": deal.get("parcel_id"),
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
"last_dd_run_at": None,
|
|
"last_dd_kind": None, # "pre_dd" | "full_dd"
|
|
}
|
|
meta_path.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
|
|
# Initialize notes.md if not present (so the user always has a place to write)
|
|
notes_path = folder / "notes.md"
|
|
if not notes_path.exists():
|
|
notes_path.write_text(
|
|
f"# Notas — {deal.get('address') or '(sin direccion)'}\n\n"
|
|
f"Carpeta creada: {datetime.now(timezone.utc).isoformat()}\n\n"
|
|
f"## Escribí acá lo que quieras recordar sobre este deal.\n\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
return folder
|
|
|
|
|
|
def write_meta(folder: Path, **updates) -> None:
|
|
"""Merges updates into .meta.json (preserves existing fields)."""
|
|
meta_path = folder / ".meta.json"
|
|
if meta_path.exists():
|
|
try:
|
|
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
data = {}
|
|
else:
|
|
data = {}
|
|
data.update(updates)
|
|
data["last_updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
meta_path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
|
|
|
|
def save_json(path: Path, data) -> None:
|
|
"""Serialize data as JSON into path (creates parent dirs if needed)."""
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8")
|
|
|
|
|
|
def save_markdown(path: Path, text: str) -> None:
|
|
"""Save markdown text into path."""
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(text, encoding="utf-8")
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# Listing / discovery
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
def list_property_folders() -> list[dict]:
|
|
"""Lista todas las carpetas de propiedad existentes.
|
|
|
|
Returns list[dict] con: state, county, deal_type, identifier, folder_path,
|
|
meta (parsed .meta.json), file_count, last_modified_at.
|
|
"""
|
|
out: list[dict] = []
|
|
if not PROPERTIES_ROOT.exists():
|
|
return out
|
|
|
|
# Walk: properties/STATE/COUNTY/DEAL_TYPE/IDENT/
|
|
for state_dir in PROPERTIES_ROOT.iterdir():
|
|
if not state_dir.is_dir():
|
|
continue
|
|
for county_dir in state_dir.iterdir():
|
|
if not county_dir.is_dir():
|
|
continue
|
|
for dtype_dir in county_dir.iterdir():
|
|
if not dtype_dir.is_dir():
|
|
continue
|
|
for ident_dir in dtype_dir.iterdir():
|
|
if not ident_dir.is_dir():
|
|
continue
|
|
# Parse .meta.json
|
|
meta_path = ident_dir / ".meta.json"
|
|
meta = {}
|
|
if meta_path.exists():
|
|
try:
|
|
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
meta = {}
|
|
# Count files (recursive)
|
|
file_count = sum(1 for _ in ident_dir.rglob("*") if _.is_file())
|
|
# Last modified
|
|
last_mod = max(
|
|
(p.stat().st_mtime for p in ident_dir.rglob("*") if p.is_file()),
|
|
default=ident_dir.stat().st_mtime,
|
|
)
|
|
out.append({
|
|
"state": state_dir.name,
|
|
"county": county_dir.name,
|
|
"deal_type": dtype_dir.name,
|
|
"identifier": ident_dir.name,
|
|
"folder_path": str(ident_dir),
|
|
"meta": meta,
|
|
"file_count": file_count,
|
|
"last_modified_at": datetime.fromtimestamp(last_mod, timezone.utc).isoformat(),
|
|
})
|
|
# Sort by last modified, newest first
|
|
out.sort(key=lambda x: x["last_modified_at"], reverse=True)
|
|
return out
|
|
|
|
|
|
def find_property_folder_by_deal_id(deal_id: int) -> Optional[Path]:
|
|
"""Encuentra la carpeta de una propiedad por deal_id (lee .meta.json)."""
|
|
for entry in list_property_folders():
|
|
if entry["meta"].get("deal_id") == deal_id:
|
|
return Path(entry["folder_path"])
|
|
return None
|