feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,266 @@
|
||||
"""properties_store.py — File-system store para Due Diligence documents.
|
||||
|
||||
Cada deal scrapeado (especialmente distressed: foreclosure, tax_deed, reo, auction)
|
||||
necesita una carpeta dedicada para acumular documentos del DD:
|
||||
|
||||
properties/{state}/{county_slug}/{deal_type}/{address_or_case_slug}/
|
||||
analysis_report.json — snapshot del último análisis multi-agente
|
||||
briefing_ejecutivo.md — Markdown del briefing del ContextualGlossaryAgent
|
||||
notes.md — Notas libres del usuario (free-form)
|
||||
deeds/ — Escrituras, deed transfers
|
||||
liens/ — Lien inventories, IRS NFTLs, code violations
|
||||
court_records/ — Lis pendens, civil suits, bankruptcy
|
||||
property_appraiser/ — Tax assessment, exemptions, sketches
|
||||
photos/ — Photos descargadas (Zillow CDN, PA sketches, etc.)
|
||||
due_diligence/ — Reporte DD final del DueDiligenceAgent
|
||||
offers/ — Letters of intent, contratos draft, contraoffers
|
||||
.meta.json — Metadata: deal_id, created_at, last_dd_run_at
|
||||
|
||||
API publica:
|
||||
get_property_folder(deal) → Path — devuelve ruta (no crea)
|
||||
ensure_property_folder(deal) → Path — crea estructura completa
|
||||
write_meta(folder, deal_id, **kwargs) → None — actualiza .meta.json
|
||||
list_property_folders() → list[dict] — para UI Mis Propiedades
|
||||
save_json(folder / "X.json", data) → None — helper para persist
|
||||
save_markdown(folder / "X.md", text) → None — helper para persist
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# Paths
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent
|
||||
PROPERTIES_ROOT = PROJECT_ROOT / "properties"
|
||||
|
||||
|
||||
SUBFOLDERS = (
|
||||
"deeds",
|
||||
"liens",
|
||||
"court_records",
|
||||
"property_appraiser",
|
||||
"photos",
|
||||
"due_diligence",
|
||||
"offers",
|
||||
)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# Slug helpers
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _slug(s: str, max_len: int = 80) -> str:
|
||||
"""Sanitize a string for filesystem use.
|
||||
|
||||
Keeps [A-Za-z0-9-_.] only, collapses runs of underscores, caps length.
|
||||
"""
|
||||
if not s:
|
||||
return "unknown"
|
||||
s = s.strip()
|
||||
# Replace runs of non-allowed chars with underscore
|
||||
s = re.sub(r"[^A-Za-z0-9\-_.]+", "_", s)
|
||||
s = re.sub(r"_+", "_", s).strip("_-.")
|
||||
return (s[:max_len] or "unknown")
|
||||
|
||||
|
||||
def _state_slug(state: Optional[str]) -> str:
|
||||
s = (state or "FL").strip().upper()
|
||||
return s if re.match(r"^[A-Z]{2}$", s) else "FL"
|
||||
|
||||
|
||||
def _county_slug(county: Optional[str]) -> str:
|
||||
c = (county or "Unknown").strip()
|
||||
c = c.replace(" County", "").strip()
|
||||
return _slug(c.replace("-", "_").replace(" ", "_"))
|
||||
|
||||
|
||||
def _deal_type_slug(deal_type: Optional[str]) -> str:
|
||||
dt = (deal_type or "unknown").strip().lower()
|
||||
# Whitelist canon values
|
||||
canon = {"foreclosure", "tax_deed", "auction", "reo", "mls", "off_market"}
|
||||
return dt if dt in canon else "unknown"
|
||||
|
||||
|
||||
def _identifier_slug(deal: dict) -> str:
|
||||
"""Pick the best identifier for the deal's folder name.
|
||||
|
||||
Priority: case_number > parcel_id > address > deal_hash > deal_id.
|
||||
"""
|
||||
case_number = (deal.get("case_number") or "").strip()
|
||||
if case_number:
|
||||
return f"case_{_slug(case_number)}"
|
||||
|
||||
parcel_id = (deal.get("parcel_id") or "").strip()
|
||||
if parcel_id:
|
||||
return f"parcel_{_slug(parcel_id)}"
|
||||
|
||||
address = (deal.get("address") or "").strip()
|
||||
if address:
|
||||
return f"addr_{_slug(address.upper())}"
|
||||
|
||||
deal_hash = (deal.get("deal_hash") or "").strip()
|
||||
if deal_hash:
|
||||
return f"hash_{_slug(deal_hash)}"
|
||||
|
||||
return f"id_{deal.get('id', 'unknown')}"
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# Folder API
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def get_property_folder(deal: dict) -> Path:
|
||||
"""Devuelve la ruta de la carpeta del deal (NO crea).
|
||||
|
||||
Format: properties/{STATE}/{County_Slug}/{deal_type}/{identifier_slug}/
|
||||
"""
|
||||
state = _state_slug(deal.get("state"))
|
||||
county = _county_slug(deal.get("county"))
|
||||
dtype = _deal_type_slug(deal.get("deal_type"))
|
||||
ident = _identifier_slug(deal)
|
||||
return PROPERTIES_ROOT / state / county / dtype / ident
|
||||
|
||||
|
||||
def ensure_property_folder(deal: dict) -> Path:
|
||||
"""Crea la estructura completa (folder + subfolders + .meta.json si no existe).
|
||||
|
||||
Idempotent: si ya existe, devuelve el path sin tocar nada (except updating
|
||||
last_seen).
|
||||
"""
|
||||
folder = get_property_folder(deal)
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create subfolders
|
||||
for sub in SUBFOLDERS:
|
||||
(folder / sub).mkdir(exist_ok=True)
|
||||
|
||||
# Initialize .meta.json if not present
|
||||
meta_path = folder / ".meta.json"
|
||||
if not meta_path.exists():
|
||||
meta = {
|
||||
"deal_id": deal.get("id"),
|
||||
"deal_hash": deal.get("deal_hash"),
|
||||
"source": deal.get("source"),
|
||||
"address": deal.get("address"),
|
||||
"county": deal.get("county"),
|
||||
"state": deal.get("state"),
|
||||
"deal_type": deal.get("deal_type"),
|
||||
"case_number": deal.get("case_number"),
|
||||
"parcel_id": deal.get("parcel_id"),
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"last_dd_run_at": None,
|
||||
"last_dd_kind": None, # "pre_dd" | "full_dd"
|
||||
}
|
||||
meta_path.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
# Initialize notes.md if not present (so the user always has a place to write)
|
||||
notes_path = folder / "notes.md"
|
||||
if not notes_path.exists():
|
||||
notes_path.write_text(
|
||||
f"# Notas — {deal.get('address') or '(sin direccion)'}\n\n"
|
||||
f"Carpeta creada: {datetime.now(timezone.utc).isoformat()}\n\n"
|
||||
f"## Escribí acá lo que quieras recordar sobre este deal.\n\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
return folder
|
||||
|
||||
|
||||
def write_meta(folder: Path, **updates) -> None:
|
||||
"""Merges updates into .meta.json (preserves existing fields)."""
|
||||
meta_path = folder / ".meta.json"
|
||||
if meta_path.exists():
|
||||
try:
|
||||
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
data = {}
|
||||
else:
|
||||
data = {}
|
||||
data.update(updates)
|
||||
data["last_updated_at"] = datetime.now(timezone.utc).isoformat()
|
||||
meta_path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
|
||||
def save_json(path: Path, data) -> None:
|
||||
"""Serialize data as JSON into path (creates parent dirs if needed)."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8")
|
||||
|
||||
|
||||
def save_markdown(path: Path, text: str) -> None:
|
||||
"""Save markdown text into path."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(text, encoding="utf-8")
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# Listing / discovery
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def list_property_folders() -> list[dict]:
|
||||
"""Lista todas las carpetas de propiedad existentes.
|
||||
|
||||
Returns list[dict] con: state, county, deal_type, identifier, folder_path,
|
||||
meta (parsed .meta.json), file_count, last_modified_at.
|
||||
"""
|
||||
out: list[dict] = []
|
||||
if not PROPERTIES_ROOT.exists():
|
||||
return out
|
||||
|
||||
# Walk: properties/STATE/COUNTY/DEAL_TYPE/IDENT/
|
||||
for state_dir in PROPERTIES_ROOT.iterdir():
|
||||
if not state_dir.is_dir():
|
||||
continue
|
||||
for county_dir in state_dir.iterdir():
|
||||
if not county_dir.is_dir():
|
||||
continue
|
||||
for dtype_dir in county_dir.iterdir():
|
||||
if not dtype_dir.is_dir():
|
||||
continue
|
||||
for ident_dir in dtype_dir.iterdir():
|
||||
if not ident_dir.is_dir():
|
||||
continue
|
||||
# Parse .meta.json
|
||||
meta_path = ident_dir / ".meta.json"
|
||||
meta = {}
|
||||
if meta_path.exists():
|
||||
try:
|
||||
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
meta = {}
|
||||
# Count files (recursive)
|
||||
file_count = sum(1 for _ in ident_dir.rglob("*") if _.is_file())
|
||||
# Last modified
|
||||
last_mod = max(
|
||||
(p.stat().st_mtime for p in ident_dir.rglob("*") if p.is_file()),
|
||||
default=ident_dir.stat().st_mtime,
|
||||
)
|
||||
out.append({
|
||||
"state": state_dir.name,
|
||||
"county": county_dir.name,
|
||||
"deal_type": dtype_dir.name,
|
||||
"identifier": ident_dir.name,
|
||||
"folder_path": str(ident_dir),
|
||||
"meta": meta,
|
||||
"file_count": file_count,
|
||||
"last_modified_at": datetime.fromtimestamp(last_mod, timezone.utc).isoformat(),
|
||||
})
|
||||
# Sort by last modified, newest first
|
||||
out.sort(key=lambda x: x["last_modified_at"], reverse=True)
|
||||
return out
|
||||
|
||||
|
||||
def find_property_folder_by_deal_id(deal_id: int) -> Optional[Path]:
|
||||
"""Encuentra la carpeta de una propiedad por deal_id (lee .meta.json)."""
|
||||
for entry in list_property_folders():
|
||||
if entry["meta"].get("deal_id") == deal_id:
|
||||
return Path(entry["folder_path"])
|
||||
return None
|
||||
Reference in New Issue
Block a user