683 lines
28 KiB
Python
683 lines
28 KiB
Python
"""Property value fetcher para Wave 2 (ValueEstimator).
|
|
|
|
Objetivo: estimar valor real de una propiedad combinando:
|
|
1. Tax assessed value (gratis, county-specific scraping)
|
|
2. Comparables recently sold (Firecrawl, MAX 5 — OPT-IN para no quemar credits)
|
|
3. Deductions automaticas por edad del inmueble (AC, roof, plumbing, panel)
|
|
|
|
OUTPUT consolidado:
|
|
{
|
|
"listing_price": int,
|
|
"tax_assessed_value": int | None,
|
|
"comps_used": [{address, sold_price, sold_date, sqft, $/sqft}, ...],
|
|
"estimated_value": {"low": int, "mid": int, "high": int, "confidence": str},
|
|
"price_per_sqft_comps_avg": float | None,
|
|
"price_per_sqft_subject": float,
|
|
"overpriced_pct": float | None,
|
|
"inflation_score": float, # 0-10
|
|
"deductions": {"ac": int, "roof": int, "plumbing": int, "panel": int, "total": int},
|
|
"market_trend": {"direction": str, "evidence": str},
|
|
"sources_used": [...],
|
|
"fetch_errors": [...],
|
|
}
|
|
|
|
FAIL-SOFT:
|
|
- Sin Firecrawl key o ENABLE_FIRECRAWL_COMPS=false → comps_used=[], confidence baja
|
|
- Sin tax assessed → tax_assessed_value=None, fallback a comps
|
|
- Sin nada → estimacion basada SOLO en deductions vs listing
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Optional
|
|
|
|
from .base import FetcherError, USER_AGENT, DEFAULT_TIMEOUT
|
|
|
|
|
|
# ─── Deductions por edad del inmueble (Florida real estate norms) ──────────
|
|
DEDUCTION_AC = 6_000 # AC central viejo (<2010) si no hay evidencia de uno nuevo
|
|
DEDUCTION_ROOF = 10_000 # Roof shingle viejo (<2005)
|
|
DEDUCTION_PLUMBING_POLYBUTYLENE = 12_000 # Polybutylene plumbing risk (1978-1995 FL)
|
|
DEDUCTION_ELECTRICAL_PANEL = 5_000 # Federal Pacific / Zinsco panels (<1990)
|
|
|
|
# ─── Keywords para detectar items renovados (suprimir deduccion correspondiente) ──
|
|
# Bug fix 2026-05-15: el sistema antes solo chequeaba "new ac"/"ac nuevo".
|
|
# Ahora detecta variantes reales que aparecen en listings: "BRAND NEW ROOF",
|
|
# "Updated/Remodeled", "Fully updated throughout", "AC replaced 2023", etc.
|
|
NEW_ITEM_KEYWORDS = {
|
|
"ac": [
|
|
"new ac", "new a/c", "new hvac", "new a.c.", "new air conditioning",
|
|
"ac replaced", "a/c replaced", "hvac replaced", "ac unit replaced",
|
|
"new air handler", "ac nuevo", "a/c nuevo", "hvac nuevo",
|
|
"ac (20", "ac 20", "a/c (20", # "AC (2023)" or "AC 2023"
|
|
"newer ac", "newer a/c", "newer hvac",
|
|
"recently replaced ac", "recently replaced a/c", "recently replaced hvac",
|
|
],
|
|
"roof": [
|
|
"new roof", "brand new roof", "brand-new roof", "newer roof",
|
|
"roof replaced", "roof recently replaced", "recent roof",
|
|
"roof 20", "roof (20", "new shingles", "metal roof installed",
|
|
"roof installed 20", "roof nuevo", "techo nuevo",
|
|
"replaced roof", "roof was replaced", "roof replacement",
|
|
],
|
|
"plumbing": [
|
|
"re-piped", "repiped", "re piped", "pex plumbing", "new plumbing",
|
|
"plumbing replaced", "plumbing nuevo", "fully repiped",
|
|
"copper plumbing", "plumbing updated", "new pipes",
|
|
],
|
|
"panel": [
|
|
"new panel", "panel upgraded", "panel replaced", "new electrical",
|
|
"electrical updated", "200 amp", "new wiring", "rewired",
|
|
"panel nuevo", "panel electrico nuevo", "upgraded electrical",
|
|
],
|
|
}
|
|
|
|
# Keywords globales que indican "renovada completa" → CERO deducciones
|
|
RENOVATED_GLOBAL_KEYWORDS = [
|
|
"updated/remodeled", # Zillow explicit condition tag
|
|
"fully updated", "fully renovated", "completely renovated", "completely remodeled",
|
|
"move-in ready", "move in ready", "turnkey", "turn key", "turn-key",
|
|
"totally renovated", "totally updated",
|
|
"renovacion completa", "completamente renovada", "lista para mudarse",
|
|
"newly renovated", "newly remodeled",
|
|
"fully remodeled",
|
|
]
|
|
|
|
# Keywords de condition_status que indican propiedad renovada
|
|
RENOVATED_CONDITION_VALUES = {
|
|
"updated/remodeled", "remodeled", "renovated", "updated",
|
|
"new construction", "newly built",
|
|
}
|
|
|
|
|
|
def _matches_any_keyword(text: str, keywords: list[str]) -> Optional[str]:
|
|
"""Returns first matched keyword (case-insensitive substring), or None."""
|
|
if not text:
|
|
return None
|
|
text_lower = text.lower()
|
|
for kw in keywords:
|
|
if kw.lower() in text_lower:
|
|
return kw
|
|
return None
|
|
|
|
|
|
# ─── Comps estimation ──────────────────────────────────────────────────────
|
|
COMPS_MAX_COUNT = 5
|
|
COMPS_LOOKBACK_DAYS = 180
|
|
COMPS_SQFT_TOLERANCE_PCT = 0.15
|
|
PRICE_LOW_PCT = 0.92 # estimated_value['low'] = mid * 0.92
|
|
PRICE_HIGH_PCT = 1.08 # estimated_value['high'] = mid * 1.08
|
|
|
|
|
|
def calculate_age_deductions(
|
|
year_built: int,
|
|
photo_findings_text: str = "",
|
|
listing_description: str = "",
|
|
condition_status: str = "",
|
|
features_special: Optional[list] = None,
|
|
) -> dict:
|
|
"""Deducciones automaticas segun edad de la propiedad.
|
|
|
|
BUG FIX 2026-05-15: antes solo chequeaba keywords minimos en photo_findings_text.
|
|
Ahora respeta condition_status, listing_description, y features_special tags.
|
|
|
|
Args:
|
|
year_built: año de construccion. Si <= 0 → ZERO deductions.
|
|
photo_findings_text: output del PhotoInspector (puede mencionar AC nuevo, etc.)
|
|
listing_description: full description del listing Zillow/Realtor
|
|
condition_status: Zillow "condition" tag (e.g. "Updated/Remodeled", "Original")
|
|
features_special: array de tags del "What's special" Zillow box
|
|
(e.g. ["BRAND NEW ROOF", "Fresh paint", "NEW AC"])
|
|
|
|
Returns:
|
|
{ac, roof, plumbing, panel, total, _skipped_global: bool,
|
|
_suppressed_items: [str], _reasons: {item: kw_matched}}
|
|
|
|
Si condition_status indica renovada O description tiene keywords globales
|
|
→ TODO 0, _skipped_global=True.
|
|
|
|
Per-item: si features_special O description menciona NEW ROOF/AC/etc,
|
|
suprime esa deduccion especifica.
|
|
"""
|
|
deductions = {"ac": 0, "roof": 0, "plumbing": 0, "panel": 0}
|
|
suppressed: list[str] = []
|
|
reasons: dict[str, str] = {}
|
|
|
|
# Combinar todos los textos en uno solo para keyword matching
|
|
combined_text = " ".join([
|
|
photo_findings_text or "",
|
|
listing_description or "",
|
|
" ".join(features_special or []),
|
|
])
|
|
|
|
# 1. CHECK GLOBAL: si condition status es renovada → CERO deducciones
|
|
cs_lower = (condition_status or "").lower().strip()
|
|
if cs_lower in RENOVATED_CONDITION_VALUES:
|
|
return {
|
|
**deductions, "total": 0,
|
|
"_skipped_global": True,
|
|
"_skip_reason": f"condition_status='{condition_status}' (Zillow tag)",
|
|
"_suppressed_items": list(deductions.keys()),
|
|
"_reasons": {k: f"condition={condition_status}" for k in deductions.keys()},
|
|
}
|
|
|
|
# 2. CHECK GLOBAL: si description o features mencionan "fully updated"/"move-in ready"
|
|
global_kw = _matches_any_keyword(combined_text, RENOVATED_GLOBAL_KEYWORDS)
|
|
if global_kw:
|
|
return {
|
|
**deductions, "total": 0,
|
|
"_skipped_global": True,
|
|
"_skip_reason": f"keyword '{global_kw}' detected in listing",
|
|
"_suppressed_items": list(deductions.keys()),
|
|
"_reasons": {k: f"keyword:{global_kw}" for k in deductions.keys()},
|
|
}
|
|
|
|
# 3. PER-ITEM: aplicar deduccion solo si edad gatilla Y no hay keyword item-specific
|
|
if not year_built or year_built <= 0:
|
|
deductions["total"] = 0
|
|
deductions["_skipped_global"] = False
|
|
deductions["_suppressed_items"] = []
|
|
deductions["_reasons"] = {}
|
|
return deductions
|
|
|
|
# AC: edad <2010 y NO hay keyword "new ac"
|
|
if year_built < 2010:
|
|
ac_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["ac"])
|
|
if ac_kw:
|
|
suppressed.append("ac")
|
|
reasons["ac"] = f"keyword:{ac_kw}"
|
|
else:
|
|
deductions["ac"] = DEDUCTION_AC
|
|
|
|
# Roof: edad <2005 y NO hay keyword "new roof"
|
|
if year_built < 2005:
|
|
roof_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["roof"])
|
|
if roof_kw:
|
|
suppressed.append("roof")
|
|
reasons["roof"] = f"keyword:{roof_kw}"
|
|
else:
|
|
deductions["roof"] = DEDUCTION_ROOF
|
|
|
|
# Plumbing polybutylene: edad 1978-1995 y NO hay keyword "repiped"
|
|
if 1978 <= year_built <= 1995:
|
|
pl_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["plumbing"])
|
|
if pl_kw:
|
|
suppressed.append("plumbing")
|
|
reasons["plumbing"] = f"keyword:{pl_kw}"
|
|
else:
|
|
deductions["plumbing"] = DEDUCTION_PLUMBING_POLYBUTYLENE
|
|
|
|
# Electrical panel: edad <1990 y NO hay keyword "new panel"
|
|
if year_built < 1990:
|
|
panel_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["panel"])
|
|
if panel_kw:
|
|
suppressed.append("panel")
|
|
reasons["panel"] = f"keyword:{panel_kw}"
|
|
else:
|
|
deductions["panel"] = DEDUCTION_ELECTRICAL_PANEL
|
|
|
|
deductions["total"] = sum(v for k, v in deductions.items() if k in ("ac", "roof", "plumbing", "panel"))
|
|
deductions["_skipped_global"] = False
|
|
deductions["_suppressed_items"] = suppressed
|
|
deductions["_reasons"] = reasons
|
|
return deductions
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Tax Assessed Value (county-specific scrapers)
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
def fetch_tax_assessed_miami_dade(address: str) -> Optional[dict]:
|
|
"""Stub para scraper Miami-Dade property appraiser.
|
|
|
|
TODO Wave 2 follow-up: implementar Playwright scraping de:
|
|
https://www.miamidade.gov/pa/property_search.asp
|
|
Steps:
|
|
1. Search by address
|
|
2. Parse results, click property card
|
|
3. Extract assessed_value, market_value, sale_history
|
|
Tiempo estimado: ~30s por lookup. Cache 30 dias.
|
|
|
|
Por ahora retorna None — el agente trabaja con comps + deductions sin tax assessed.
|
|
"""
|
|
return None
|
|
|
|
|
|
# Set explicito de counties que TIENEN scraper real (no stub).
|
|
# Hoy: Broward funciona via pa_broward.py (full PA record extraction).
|
|
# Pendiente: Miami-Dade, Palm Beach, Orange, Hillsborough, Pinellas... (custom adapters)
|
|
# qPublic (~30 rurales) — bloqueado por Cloudflare
|
|
_TAX_ASSESSED_IMPLEMENTED_COUNTIES: set[str] = {
|
|
"broward",
|
|
}
|
|
|
|
|
|
def is_tax_assessed_supported(county_name: Optional[str], state: Optional[str]) -> bool:
|
|
"""True si tenemos scraper real para este county. False si es stub o no esta.
|
|
|
|
El orchestrator usa esto para distinguir 'no implementado' (decisión nuestra)
|
|
de 'not found' (buscamos y no estaba). NO mentir al usuario.
|
|
"""
|
|
if not county_name or state != "FL":
|
|
return False
|
|
cname = county_name.lower().replace(" county", "").strip()
|
|
return cname in _TAX_ASSESSED_IMPLEMENTED_COUNTIES
|
|
|
|
|
|
def fetch_tax_assessed(
|
|
address: str,
|
|
county_name: Optional[str],
|
|
state: Optional[str],
|
|
parcel_id: Optional[str] = None,
|
|
) -> Optional[dict]:
|
|
"""Router por condado para Property Appraiser data.
|
|
|
|
Args:
|
|
address: street address (used by counties whose adapter supports address search)
|
|
county_name: e.g. "Broward", "Miami-Dade"
|
|
state: must be "FL" today
|
|
parcel_id: county-specific folio number (required for Broward; preferred
|
|
for all counties since address matching is often fuzzy on PA sites)
|
|
|
|
Returns:
|
|
Rich dict with legacy keys (assessed_value, market_value, year_built, sqft,
|
|
owner_name, source) plus extended fields when adapter supports them
|
|
(sales_history, mailing_address, tax_breakdown, photo_url, etc.).
|
|
None if county not implemented OR adapter returned empty result.
|
|
|
|
Use is_tax_assessed_supported() to distinguish "not implemented" vs "not found".
|
|
"""
|
|
if not is_tax_assessed_supported(county_name, state):
|
|
return None
|
|
cname = (county_name or "").lower().replace(" county", "").strip()
|
|
|
|
if cname == "broward":
|
|
if not parcel_id:
|
|
# Broward needs folio — we can't do reliable address search yet
|
|
return None
|
|
return _fetch_broward(parcel_id)
|
|
|
|
if "miami-dade" in cname or "miami dade" in cname:
|
|
return fetch_tax_assessed_miami_dade(address)
|
|
|
|
# Defensive: not reachable if is_tax_assessed_supported is in sync
|
|
return None
|
|
|
|
|
|
def _fetch_broward(parcel_id: str) -> Optional[dict]:
|
|
"""Broward adapter: pa_broward.py rich record → legacy contract + extensions."""
|
|
try:
|
|
from data_fetchers.pa_broward import fetch_broward_pa_record
|
|
except ImportError:
|
|
return None
|
|
|
|
rec = fetch_broward_pa_record(parcel_id)
|
|
if not rec or rec.get("errors") and not rec.get("just_value_current"):
|
|
# Broward adapter failed AND no fallback data → return None
|
|
return None
|
|
|
|
cy = rec.get("current_year", {})
|
|
ly = rec.get("last_year", {})
|
|
|
|
# Concatenate owner_name + owner_name_2 if continuation exists (e.g. "BANK OF AMERICA NA TRSTEE" + "% CORP REAL ESTATE ASSMT")
|
|
owner_full = rec.get("owner_name", "") or ""
|
|
if rec.get("owner_name_2"):
|
|
owner_full = f"{owner_full} {rec['owner_name_2']}".strip()
|
|
|
|
# Parse beds/baths from "1 / 3 / 2.50" format (units/beds/baths)
|
|
beds = baths = None
|
|
ubb = (rec.get("units_beds_baths") or "").split("/")
|
|
if len(ubb) >= 3:
|
|
try:
|
|
beds_raw = ubb[1].strip()
|
|
beds = int(beds_raw) if beds_raw and beds_raw.replace(".", "").isdigit() else None
|
|
except (ValueError, IndexError):
|
|
pass
|
|
try:
|
|
baths_raw = ubb[2].strip()
|
|
baths = float(baths_raw) if baths_raw and baths_raw.replace(".", "").isdigit() else None
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
# ─── Legacy contract (back-compat with existing orchestrator/LLM payload) ──
|
|
return {
|
|
# Required legacy keys
|
|
"assessed_value": cy.get("assessed_value") or ly.get("assessed_value"),
|
|
"market_value": cy.get("just_value") or ly.get("just_value"),
|
|
"just_value": cy.get("just_value") or ly.get("just_value"),
|
|
"year_built": rec.get("year_built"),
|
|
"sqft": rec.get("under_air_sqft") or rec.get("adj_bldg_sqft"),
|
|
"beds": beds,
|
|
"baths": baths,
|
|
"owner_name": owner_full,
|
|
"source": "bcpa.net",
|
|
# ─── Extended fields (Property Snapshot Report inputs) ────────────────
|
|
"folio_number": rec.get("folio_number"),
|
|
"mailing_address": rec.get("mailing_address"),
|
|
"situs_address": rec.get("situs_address"),
|
|
"neighborhood": rec.get("neighborhood"),
|
|
"use_code": rec.get("use_code"),
|
|
"millage_code": rec.get("millage_code"),
|
|
"legal_description": rec.get("legal_description"),
|
|
"adj_bldg_sqft": rec.get("adj_bldg_sqft"),
|
|
"under_air_sqft": rec.get("under_air_sqft"),
|
|
"effective_year": rec.get("effective_year"),
|
|
"homestead_active": rec.get("homestead_active", False),
|
|
"taxes_paid_last_year": ly.get("taxes_paid"),
|
|
"tax_year_last": ly.get("tax_year"),
|
|
"tax_year_current": cy.get("tax_year"),
|
|
"current_year_values": cy,
|
|
"last_year_values": ly,
|
|
"two_years_ago_values": rec.get("two_years_ago", {}),
|
|
"tax_breakdown": rec.get("tax_breakdown", {}),
|
|
"sales_history": rec.get("sales_history", []),
|
|
"photo_url": rec.get("photo_url"),
|
|
"source_url": rec.get("source_url"),
|
|
"source_api_url": rec.get("source_api_url"),
|
|
"fetched_at": rec.get("fetched_at"),
|
|
}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Firecrawl comps (OPT-IN para no quemar credits)
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
def _firecrawl_enabled() -> bool:
|
|
"""Check env flag + key presence."""
|
|
flag = os.getenv("ENABLE_FIRECRAWL_COMPS", "false").lower() == "true"
|
|
has_key = bool(os.getenv("FIRECRAWL_API_KEY", "").strip())
|
|
return flag and has_key
|
|
|
|
|
|
def fetch_zillow_comps(
|
|
zip_code: str,
|
|
beds: int,
|
|
baths: float,
|
|
sqft: int,
|
|
max_count: int = COMPS_MAX_COUNT,
|
|
) -> tuple[list[dict], list[str]]:
|
|
"""Fetch recently sold comps via Firecrawl scrape de Zillow.
|
|
|
|
Returns (comps_list, errors). Comps list capped at max_count.
|
|
Cada comp: {address, sold_price, sold_date_text, sqft, price_per_sqft}
|
|
|
|
OPT-IN: requiere ENABLE_FIRECRAWL_COMPS=true en .env.
|
|
"""
|
|
errors: list[str] = []
|
|
|
|
if not _firecrawl_enabled():
|
|
errors.append(
|
|
"Firecrawl comps deshabilitado. Setear ENABLE_FIRECRAWL_COMPS=true en .env para activar."
|
|
)
|
|
return [], errors
|
|
|
|
try:
|
|
from firecrawl import FirecrawlApp
|
|
except ImportError as e:
|
|
errors.append(f"firecrawl-py no importable: {e}")
|
|
return [], errors
|
|
|
|
api_key = os.getenv("FIRECRAWL_API_KEY", "").strip()
|
|
if not api_key:
|
|
errors.append("FIRECRAWL_API_KEY ausente en .env")
|
|
return [], errors
|
|
|
|
# Zillow recently sold URL para ZIP
|
|
url = f"https://www.zillow.com/homes/recently_sold/{zip_code}_rb/"
|
|
|
|
try:
|
|
app = FirecrawlApp(api_key=api_key)
|
|
# Firecrawl SDK v2+: .scrape() (renamed from legacy .scrape_url()).
|
|
# Returns Document object with .markdown attribute on success.
|
|
result = app.scrape(url, formats=["markdown"])
|
|
if not result or not hasattr(result, "markdown"):
|
|
errors.append("Firecrawl devolvio resultado vacio")
|
|
return [], errors
|
|
md = result.markdown or ""
|
|
except Exception as e:
|
|
errors.append(f"Firecrawl scrape error: {e}")
|
|
return [], errors
|
|
|
|
# Parser best-effort del markdown de Zillow.
|
|
# Buscar bloques con: $price + sqft + address + sold date
|
|
# Patrones tipicos en markdown de Zillow recently sold:
|
|
# "$485,000" ... "1,450 sqft" ... "123 Main St" ... "Sold X/Y/Z"
|
|
comps = _parse_zillow_markdown(md, beds=beds, baths=baths, sqft_target=sqft)
|
|
if not comps:
|
|
errors.append("Firecrawl OK pero parser no extrajo comps (Zillow cambio formato?)")
|
|
|
|
return comps[:max_count], errors
|
|
|
|
|
|
def _parse_zillow_markdown(md: str, beds: int, baths: float, sqft_target: int) -> list[dict]:
|
|
"""Best-effort parser de Zillow markdown.
|
|
|
|
Extrae bloques con price + sqft + address. Tolera variaciones.
|
|
"""
|
|
comps = []
|
|
# Buscar todos los matches de precio + sqft cercanos
|
|
# Pattern: $XXX,XXX (con o sin centavos) seguido en proximidad de "X,XXX sqft" o "X bd"
|
|
price_pattern = re.compile(r"\$([\d,]{3,9})", re.IGNORECASE)
|
|
sqft_pattern = re.compile(r"([\d,]{3,5})\s*sq(?:\.|uare)?\s*ft", re.IGNORECASE)
|
|
sold_pattern = re.compile(r"(sold|vendido)[\s:]+([0-9/.-]+)", re.IGNORECASE)
|
|
bed_pattern = re.compile(r"([\d.]+)\s*(?:bd|bed)", re.IGNORECASE)
|
|
|
|
# Segmentar md en bloques de ~500 chars que probablemente contengan 1 listing
|
|
segments = re.split(r"\n\n+|---+", md)
|
|
for seg in segments:
|
|
if len(seg) < 50 or len(seg) > 2000:
|
|
continue
|
|
prices = price_pattern.findall(seg)
|
|
sqfts = sqft_pattern.findall(seg)
|
|
if not prices or not sqfts:
|
|
continue
|
|
try:
|
|
price = int(prices[0].replace(",", ""))
|
|
sqft = int(sqfts[0].replace(",", ""))
|
|
except (ValueError, IndexError):
|
|
continue
|
|
# Filtro: precio razonable para single home
|
|
if price < 30_000 or price > 5_000_000:
|
|
continue
|
|
if sqft < 400 or sqft > 8_000:
|
|
continue
|
|
# Filtro: sqft cerca del subject (+/- COMPS_SQFT_TOLERANCE_PCT)
|
|
if sqft_target > 0:
|
|
ratio = sqft / sqft_target
|
|
if ratio < (1 - COMPS_SQFT_TOLERANCE_PCT) or ratio > (1 + COMPS_SQFT_TOLERANCE_PCT):
|
|
continue
|
|
|
|
sold_match = sold_pattern.search(seg)
|
|
sold_date = sold_match.group(2) if sold_match else "?"
|
|
|
|
bed_match = bed_pattern.search(seg)
|
|
comp_beds = bed_match.group(1) if bed_match else "?"
|
|
|
|
# Address best-effort: primera linea o primer link
|
|
addr_match = re.search(r"\[([^\]]+(?:St|Ave|Rd|Dr|Ln|Way|Blvd|Ct|Ter|Pl)[^\]]*)\]", seg, re.IGNORECASE)
|
|
address = addr_match.group(1) if addr_match else "(direccion no parseada)"
|
|
|
|
comps.append({
|
|
"address": address,
|
|
"sold_price": price,
|
|
"sold_date": sold_date,
|
|
"sqft": sqft,
|
|
"beds_text": comp_beds,
|
|
"price_per_sqft": round(price / sqft, 2) if sqft > 0 else 0,
|
|
})
|
|
|
|
return comps
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# Estimacion de valor y inflation score
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
def estimate_value_from_comps(comps: list[dict], subject_sqft: int) -> tuple[Optional[int], Optional[float]]:
|
|
"""Calcula valor estimado mid + $/sqft promedio de comps."""
|
|
if not comps or subject_sqft <= 0:
|
|
return None, None
|
|
prices_per_sqft = [c["price_per_sqft"] for c in comps if c.get("price_per_sqft", 0) > 0]
|
|
if not prices_per_sqft:
|
|
return None, None
|
|
avg_ppsqft = sum(prices_per_sqft) / len(prices_per_sqft)
|
|
estimated_mid = int(avg_ppsqft * subject_sqft)
|
|
return estimated_mid, round(avg_ppsqft, 2)
|
|
|
|
|
|
def calculate_inflation_score(listing_price: float, estimated_mid: float) -> float:
|
|
"""Score 0-10 de cuanto el listing esta sobre el valor estimado.
|
|
|
|
0 = listing al valor o por debajo (subvaluado)
|
|
5 = listing 10% sobre valor
|
|
10 = listing 30%+ sobre valor (gravemente inflado)
|
|
"""
|
|
if estimated_mid <= 0:
|
|
return 5.0 # default medium
|
|
overpriced = (listing_price - estimated_mid) / estimated_mid
|
|
if overpriced <= 0:
|
|
return 0.0
|
|
if overpriced >= 0.30:
|
|
return 10.0
|
|
return round(overpriced / 0.30 * 10, 1)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
# API publica
|
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
def fetch_property_value(
|
|
*,
|
|
address: str,
|
|
listing_price: float,
|
|
sqft: int,
|
|
beds: int,
|
|
baths: float,
|
|
year_built: int,
|
|
zip_code: Optional[str] = None,
|
|
county_name: Optional[str] = None,
|
|
state: Optional[str] = None,
|
|
photo_findings_text: str = "",
|
|
listing_description: str = "",
|
|
condition_status: str = "",
|
|
features_special: Optional[list] = None,
|
|
include_firecrawl_comps: Optional[bool] = None,
|
|
) -> dict:
|
|
"""Entry point. Combina tax assessed + comps + deductions en un dict consolidado.
|
|
|
|
Bug fix 2026-05-15: ahora acepta listing_description, condition_status,
|
|
features_special para que calculate_age_deductions pueda detectar listings
|
|
renovados (e.g. condition='Updated/Remodeled' o description='Fully updated
|
|
throughout, BRAND NEW ROOF, NEW AC') y suprimir deducciones falsas.
|
|
|
|
include_firecrawl_comps:
|
|
None → usa el flag ENABLE_FIRECRAWL_COMPS de .env (default false)
|
|
True → fuerza llamada a Firecrawl (consume credits)
|
|
False → omite Firecrawl
|
|
"""
|
|
fetched_at = datetime.now(timezone.utc).isoformat()
|
|
sources_used: list[str] = []
|
|
errors: list[str] = []
|
|
|
|
# 1. Deductions por edad (siempre, gratis) — ahora respeta condition + keywords
|
|
deductions = calculate_age_deductions(
|
|
year_built=year_built,
|
|
photo_findings_text=photo_findings_text,
|
|
listing_description=listing_description,
|
|
condition_status=condition_status,
|
|
features_special=features_special,
|
|
)
|
|
if deductions["total"] > 0:
|
|
sources_used.append("Deductions por edad (heuristica FL)")
|
|
elif deductions.get("_skipped_global"):
|
|
sources_used.append(
|
|
f"Deductions SKIPPED (renovated: {deductions.get('_skip_reason', '?')})"
|
|
)
|
|
|
|
# 2. Tax assessed (stub Miami-Dade por ahora)
|
|
tax_assessed_data = fetch_tax_assessed(address, county_name, state)
|
|
tax_assessed_value = None
|
|
if tax_assessed_data:
|
|
tax_assessed_value = tax_assessed_data.get("assessed_value")
|
|
sources_used.append(f"Tax assessed ({county_name})")
|
|
else:
|
|
errors.append(f"Tax assessed no disponible para {county_name or '?'} (scraper pendiente Wave 2 follow-up)")
|
|
|
|
# 3. Firecrawl comps (opt-in)
|
|
if include_firecrawl_comps is None:
|
|
do_firecrawl = _firecrawl_enabled()
|
|
else:
|
|
do_firecrawl = include_firecrawl_comps
|
|
|
|
comps: list[dict] = []
|
|
if do_firecrawl and zip_code:
|
|
comps, comp_errors = fetch_zillow_comps(zip_code, beds, baths, sqft)
|
|
errors.extend(comp_errors)
|
|
if comps:
|
|
sources_used.append(f"Zillow recently sold ({len(comps)} comps via Firecrawl)")
|
|
|
|
# 4. Calcular valor estimado
|
|
estimated_mid_from_comps, avg_ppsqft = estimate_value_from_comps(comps, sqft)
|
|
price_per_sqft_subject = round(listing_price / sqft, 2) if sqft > 0 else 0
|
|
|
|
# Combinar tax_assessed + comps + deductions para mid estimate
|
|
candidates_mid = []
|
|
if estimated_mid_from_comps:
|
|
candidates_mid.append(estimated_mid_from_comps)
|
|
if tax_assessed_value:
|
|
# Tax assessed en FL suele ser ~85% del market value
|
|
candidates_mid.append(int(tax_assessed_value / 0.85))
|
|
|
|
if candidates_mid:
|
|
estimated_mid = int(sum(candidates_mid) / len(candidates_mid)) - deductions["total"]
|
|
else:
|
|
# Fallback: listing - deductions, con muy low confidence
|
|
estimated_mid = max(0, int(listing_price) - deductions["total"])
|
|
|
|
estimated_mid = max(estimated_mid, 1) # nunca zero
|
|
|
|
estimated_low = int(estimated_mid * PRICE_LOW_PCT)
|
|
estimated_high = int(estimated_mid * PRICE_HIGH_PCT)
|
|
|
|
# Confidence segun fuentes disponibles
|
|
if comps and tax_assessed_value:
|
|
confidence = "high"
|
|
elif comps or tax_assessed_value:
|
|
confidence = "medium"
|
|
else:
|
|
confidence = "low"
|
|
|
|
overpriced_pct = None
|
|
if estimated_mid > 0:
|
|
overpriced_pct = round((listing_price - estimated_mid) / estimated_mid * 100, 1)
|
|
|
|
inflation_score = calculate_inflation_score(listing_price, estimated_mid)
|
|
|
|
return {
|
|
"listing_price": int(listing_price),
|
|
"tax_assessed_value": tax_assessed_value,
|
|
"comps_used": comps,
|
|
"estimated_value": {
|
|
"low": estimated_low,
|
|
"mid": estimated_mid,
|
|
"high": estimated_high,
|
|
"confidence": confidence,
|
|
},
|
|
"price_per_sqft_comps_avg": avg_ppsqft,
|
|
"price_per_sqft_subject": price_per_sqft_subject,
|
|
"overpriced_pct": overpriced_pct,
|
|
"inflation_score": inflation_score,
|
|
"deductions": deductions,
|
|
"market_trend": {
|
|
"direction": "unknown",
|
|
"evidence": "Para detectar tendencia requiere historial de comps (no implementado en MVP)",
|
|
},
|
|
"sources_used": sources_used,
|
|
"fetch_errors": errors,
|
|
"firecrawl_used": do_firecrawl and bool(comps),
|
|
"fetched_at": fetched_at,
|
|
}
|