feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,682 @@
|
||||
"""Property value fetcher para Wave 2 (ValueEstimator).
|
||||
|
||||
Objetivo: estimar valor real de una propiedad combinando:
|
||||
1. Tax assessed value (gratis, county-specific scraping)
|
||||
2. Comparables recently sold (Firecrawl, MAX 5 — OPT-IN para no quemar credits)
|
||||
3. Deductions automaticas por edad del inmueble (AC, roof, plumbing, panel)
|
||||
|
||||
OUTPUT consolidado:
|
||||
{
|
||||
"listing_price": int,
|
||||
"tax_assessed_value": int | None,
|
||||
"comps_used": [{address, sold_price, sold_date, sqft, $/sqft}, ...],
|
||||
"estimated_value": {"low": int, "mid": int, "high": int, "confidence": str},
|
||||
"price_per_sqft_comps_avg": float | None,
|
||||
"price_per_sqft_subject": float,
|
||||
"overpriced_pct": float | None,
|
||||
"inflation_score": float, # 0-10
|
||||
"deductions": {"ac": int, "roof": int, "plumbing": int, "panel": int, "total": int},
|
||||
"market_trend": {"direction": str, "evidence": str},
|
||||
"sources_used": [...],
|
||||
"fetch_errors": [...],
|
||||
}
|
||||
|
||||
FAIL-SOFT:
|
||||
- Sin Firecrawl key o ENABLE_FIRECRAWL_COMPS=false → comps_used=[], confidence baja
|
||||
- Sin tax assessed → tax_assessed_value=None, fallback a comps
|
||||
- Sin nada → estimacion basada SOLO en deductions vs listing
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from .base import FetcherError, USER_AGENT, DEFAULT_TIMEOUT
|
||||
|
||||
|
||||
# ─── Deductions por edad del inmueble (Florida real estate norms) ──────────
|
||||
DEDUCTION_AC = 6_000 # AC central viejo (<2010) si no hay evidencia de uno nuevo
|
||||
DEDUCTION_ROOF = 10_000 # Roof shingle viejo (<2005)
|
||||
DEDUCTION_PLUMBING_POLYBUTYLENE = 12_000 # Polybutylene plumbing risk (1978-1995 FL)
|
||||
DEDUCTION_ELECTRICAL_PANEL = 5_000 # Federal Pacific / Zinsco panels (<1990)
|
||||
|
||||
# ─── Keywords para detectar items renovados (suprimir deduccion correspondiente) ──
|
||||
# Bug fix 2026-05-15: el sistema antes solo chequeaba "new ac"/"ac nuevo".
|
||||
# Ahora detecta variantes reales que aparecen en listings: "BRAND NEW ROOF",
|
||||
# "Updated/Remodeled", "Fully updated throughout", "AC replaced 2023", etc.
|
||||
NEW_ITEM_KEYWORDS = {
|
||||
"ac": [
|
||||
"new ac", "new a/c", "new hvac", "new a.c.", "new air conditioning",
|
||||
"ac replaced", "a/c replaced", "hvac replaced", "ac unit replaced",
|
||||
"new air handler", "ac nuevo", "a/c nuevo", "hvac nuevo",
|
||||
"ac (20", "ac 20", "a/c (20", # "AC (2023)" or "AC 2023"
|
||||
"newer ac", "newer a/c", "newer hvac",
|
||||
"recently replaced ac", "recently replaced a/c", "recently replaced hvac",
|
||||
],
|
||||
"roof": [
|
||||
"new roof", "brand new roof", "brand-new roof", "newer roof",
|
||||
"roof replaced", "roof recently replaced", "recent roof",
|
||||
"roof 20", "roof (20", "new shingles", "metal roof installed",
|
||||
"roof installed 20", "roof nuevo", "techo nuevo",
|
||||
"replaced roof", "roof was replaced", "roof replacement",
|
||||
],
|
||||
"plumbing": [
|
||||
"re-piped", "repiped", "re piped", "pex plumbing", "new plumbing",
|
||||
"plumbing replaced", "plumbing nuevo", "fully repiped",
|
||||
"copper plumbing", "plumbing updated", "new pipes",
|
||||
],
|
||||
"panel": [
|
||||
"new panel", "panel upgraded", "panel replaced", "new electrical",
|
||||
"electrical updated", "200 amp", "new wiring", "rewired",
|
||||
"panel nuevo", "panel electrico nuevo", "upgraded electrical",
|
||||
],
|
||||
}
|
||||
|
||||
# Keywords globales que indican "renovada completa" → CERO deducciones
|
||||
RENOVATED_GLOBAL_KEYWORDS = [
|
||||
"updated/remodeled", # Zillow explicit condition tag
|
||||
"fully updated", "fully renovated", "completely renovated", "completely remodeled",
|
||||
"move-in ready", "move in ready", "turnkey", "turn key", "turn-key",
|
||||
"totally renovated", "totally updated",
|
||||
"renovacion completa", "completamente renovada", "lista para mudarse",
|
||||
"newly renovated", "newly remodeled",
|
||||
"fully remodeled",
|
||||
]
|
||||
|
||||
# Keywords de condition_status que indican propiedad renovada
|
||||
RENOVATED_CONDITION_VALUES = {
|
||||
"updated/remodeled", "remodeled", "renovated", "updated",
|
||||
"new construction", "newly built",
|
||||
}
|
||||
|
||||
|
||||
def _matches_any_keyword(text: str, keywords: list[str]) -> Optional[str]:
|
||||
"""Returns first matched keyword (case-insensitive substring), or None."""
|
||||
if not text:
|
||||
return None
|
||||
text_lower = text.lower()
|
||||
for kw in keywords:
|
||||
if kw.lower() in text_lower:
|
||||
return kw
|
||||
return None
|
||||
|
||||
|
||||
# ─── Comps estimation ──────────────────────────────────────────────────────
|
||||
COMPS_MAX_COUNT = 5
|
||||
COMPS_LOOKBACK_DAYS = 180
|
||||
COMPS_SQFT_TOLERANCE_PCT = 0.15
|
||||
PRICE_LOW_PCT = 0.92 # estimated_value['low'] = mid * 0.92
|
||||
PRICE_HIGH_PCT = 1.08 # estimated_value['high'] = mid * 1.08
|
||||
|
||||
|
||||
def calculate_age_deductions(
|
||||
year_built: int,
|
||||
photo_findings_text: str = "",
|
||||
listing_description: str = "",
|
||||
condition_status: str = "",
|
||||
features_special: Optional[list] = None,
|
||||
) -> dict:
|
||||
"""Deducciones automaticas segun edad de la propiedad.
|
||||
|
||||
BUG FIX 2026-05-15: antes solo chequeaba keywords minimos en photo_findings_text.
|
||||
Ahora respeta condition_status, listing_description, y features_special tags.
|
||||
|
||||
Args:
|
||||
year_built: año de construccion. Si <= 0 → ZERO deductions.
|
||||
photo_findings_text: output del PhotoInspector (puede mencionar AC nuevo, etc.)
|
||||
listing_description: full description del listing Zillow/Realtor
|
||||
condition_status: Zillow "condition" tag (e.g. "Updated/Remodeled", "Original")
|
||||
features_special: array de tags del "What's special" Zillow box
|
||||
(e.g. ["BRAND NEW ROOF", "Fresh paint", "NEW AC"])
|
||||
|
||||
Returns:
|
||||
{ac, roof, plumbing, panel, total, _skipped_global: bool,
|
||||
_suppressed_items: [str], _reasons: {item: kw_matched}}
|
||||
|
||||
Si condition_status indica renovada O description tiene keywords globales
|
||||
→ TODO 0, _skipped_global=True.
|
||||
|
||||
Per-item: si features_special O description menciona NEW ROOF/AC/etc,
|
||||
suprime esa deduccion especifica.
|
||||
"""
|
||||
deductions = {"ac": 0, "roof": 0, "plumbing": 0, "panel": 0}
|
||||
suppressed: list[str] = []
|
||||
reasons: dict[str, str] = {}
|
||||
|
||||
# Combinar todos los textos en uno solo para keyword matching
|
||||
combined_text = " ".join([
|
||||
photo_findings_text or "",
|
||||
listing_description or "",
|
||||
" ".join(features_special or []),
|
||||
])
|
||||
|
||||
# 1. CHECK GLOBAL: si condition status es renovada → CERO deducciones
|
||||
cs_lower = (condition_status or "").lower().strip()
|
||||
if cs_lower in RENOVATED_CONDITION_VALUES:
|
||||
return {
|
||||
**deductions, "total": 0,
|
||||
"_skipped_global": True,
|
||||
"_skip_reason": f"condition_status='{condition_status}' (Zillow tag)",
|
||||
"_suppressed_items": list(deductions.keys()),
|
||||
"_reasons": {k: f"condition={condition_status}" for k in deductions.keys()},
|
||||
}
|
||||
|
||||
# 2. CHECK GLOBAL: si description o features mencionan "fully updated"/"move-in ready"
|
||||
global_kw = _matches_any_keyword(combined_text, RENOVATED_GLOBAL_KEYWORDS)
|
||||
if global_kw:
|
||||
return {
|
||||
**deductions, "total": 0,
|
||||
"_skipped_global": True,
|
||||
"_skip_reason": f"keyword '{global_kw}' detected in listing",
|
||||
"_suppressed_items": list(deductions.keys()),
|
||||
"_reasons": {k: f"keyword:{global_kw}" for k in deductions.keys()},
|
||||
}
|
||||
|
||||
# 3. PER-ITEM: aplicar deduccion solo si edad gatilla Y no hay keyword item-specific
|
||||
if not year_built or year_built <= 0:
|
||||
deductions["total"] = 0
|
||||
deductions["_skipped_global"] = False
|
||||
deductions["_suppressed_items"] = []
|
||||
deductions["_reasons"] = {}
|
||||
return deductions
|
||||
|
||||
# AC: edad <2010 y NO hay keyword "new ac"
|
||||
if year_built < 2010:
|
||||
ac_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["ac"])
|
||||
if ac_kw:
|
||||
suppressed.append("ac")
|
||||
reasons["ac"] = f"keyword:{ac_kw}"
|
||||
else:
|
||||
deductions["ac"] = DEDUCTION_AC
|
||||
|
||||
# Roof: edad <2005 y NO hay keyword "new roof"
|
||||
if year_built < 2005:
|
||||
roof_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["roof"])
|
||||
if roof_kw:
|
||||
suppressed.append("roof")
|
||||
reasons["roof"] = f"keyword:{roof_kw}"
|
||||
else:
|
||||
deductions["roof"] = DEDUCTION_ROOF
|
||||
|
||||
# Plumbing polybutylene: edad 1978-1995 y NO hay keyword "repiped"
|
||||
if 1978 <= year_built <= 1995:
|
||||
pl_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["plumbing"])
|
||||
if pl_kw:
|
||||
suppressed.append("plumbing")
|
||||
reasons["plumbing"] = f"keyword:{pl_kw}"
|
||||
else:
|
||||
deductions["plumbing"] = DEDUCTION_PLUMBING_POLYBUTYLENE
|
||||
|
||||
# Electrical panel: edad <1990 y NO hay keyword "new panel"
|
||||
if year_built < 1990:
|
||||
panel_kw = _matches_any_keyword(combined_text, NEW_ITEM_KEYWORDS["panel"])
|
||||
if panel_kw:
|
||||
suppressed.append("panel")
|
||||
reasons["panel"] = f"keyword:{panel_kw}"
|
||||
else:
|
||||
deductions["panel"] = DEDUCTION_ELECTRICAL_PANEL
|
||||
|
||||
deductions["total"] = sum(v for k, v in deductions.items() if k in ("ac", "roof", "plumbing", "panel"))
|
||||
deductions["_skipped_global"] = False
|
||||
deductions["_suppressed_items"] = suppressed
|
||||
deductions["_reasons"] = reasons
|
||||
return deductions
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# Tax Assessed Value (county-specific scrapers)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def fetch_tax_assessed_miami_dade(address: str) -> Optional[dict]:
|
||||
"""Stub para scraper Miami-Dade property appraiser.
|
||||
|
||||
TODO Wave 2 follow-up: implementar Playwright scraping de:
|
||||
https://www.miamidade.gov/pa/property_search.asp
|
||||
Steps:
|
||||
1. Search by address
|
||||
2. Parse results, click property card
|
||||
3. Extract assessed_value, market_value, sale_history
|
||||
Tiempo estimado: ~30s por lookup. Cache 30 dias.
|
||||
|
||||
Por ahora retorna None — el agente trabaja con comps + deductions sin tax assessed.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
# Set explicito de counties que TIENEN scraper real (no stub).
|
||||
# Hoy: Broward funciona via pa_broward.py (full PA record extraction).
|
||||
# Pendiente: Miami-Dade, Palm Beach, Orange, Hillsborough, Pinellas... (custom adapters)
|
||||
# qPublic (~30 rurales) — bloqueado por Cloudflare
|
||||
_TAX_ASSESSED_IMPLEMENTED_COUNTIES: set[str] = {
|
||||
"broward",
|
||||
}
|
||||
|
||||
|
||||
def is_tax_assessed_supported(county_name: Optional[str], state: Optional[str]) -> bool:
|
||||
"""True si tenemos scraper real para este county. False si es stub o no esta.
|
||||
|
||||
El orchestrator usa esto para distinguir 'no implementado' (decisión nuestra)
|
||||
de 'not found' (buscamos y no estaba). NO mentir al usuario.
|
||||
"""
|
||||
if not county_name or state != "FL":
|
||||
return False
|
||||
cname = county_name.lower().replace(" county", "").strip()
|
||||
return cname in _TAX_ASSESSED_IMPLEMENTED_COUNTIES
|
||||
|
||||
|
||||
def fetch_tax_assessed(
|
||||
address: str,
|
||||
county_name: Optional[str],
|
||||
state: Optional[str],
|
||||
parcel_id: Optional[str] = None,
|
||||
) -> Optional[dict]:
|
||||
"""Router por condado para Property Appraiser data.
|
||||
|
||||
Args:
|
||||
address: street address (used by counties whose adapter supports address search)
|
||||
county_name: e.g. "Broward", "Miami-Dade"
|
||||
state: must be "FL" today
|
||||
parcel_id: county-specific folio number (required for Broward; preferred
|
||||
for all counties since address matching is often fuzzy on PA sites)
|
||||
|
||||
Returns:
|
||||
Rich dict with legacy keys (assessed_value, market_value, year_built, sqft,
|
||||
owner_name, source) plus extended fields when adapter supports them
|
||||
(sales_history, mailing_address, tax_breakdown, photo_url, etc.).
|
||||
None if county not implemented OR adapter returned empty result.
|
||||
|
||||
Use is_tax_assessed_supported() to distinguish "not implemented" vs "not found".
|
||||
"""
|
||||
if not is_tax_assessed_supported(county_name, state):
|
||||
return None
|
||||
cname = (county_name or "").lower().replace(" county", "").strip()
|
||||
|
||||
if cname == "broward":
|
||||
if not parcel_id:
|
||||
# Broward needs folio — we can't do reliable address search yet
|
||||
return None
|
||||
return _fetch_broward(parcel_id)
|
||||
|
||||
if "miami-dade" in cname or "miami dade" in cname:
|
||||
return fetch_tax_assessed_miami_dade(address)
|
||||
|
||||
# Defensive: not reachable if is_tax_assessed_supported is in sync
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_broward(parcel_id: str) -> Optional[dict]:
|
||||
"""Broward adapter: pa_broward.py rich record → legacy contract + extensions."""
|
||||
try:
|
||||
from data_fetchers.pa_broward import fetch_broward_pa_record
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
rec = fetch_broward_pa_record(parcel_id)
|
||||
if not rec or rec.get("errors") and not rec.get("just_value_current"):
|
||||
# Broward adapter failed AND no fallback data → return None
|
||||
return None
|
||||
|
||||
cy = rec.get("current_year", {})
|
||||
ly = rec.get("last_year", {})
|
||||
|
||||
# Concatenate owner_name + owner_name_2 if continuation exists (e.g. "BANK OF AMERICA NA TRSTEE" + "% CORP REAL ESTATE ASSMT")
|
||||
owner_full = rec.get("owner_name", "") or ""
|
||||
if rec.get("owner_name_2"):
|
||||
owner_full = f"{owner_full} {rec['owner_name_2']}".strip()
|
||||
|
||||
# Parse beds/baths from "1 / 3 / 2.50" format (units/beds/baths)
|
||||
beds = baths = None
|
||||
ubb = (rec.get("units_beds_baths") or "").split("/")
|
||||
if len(ubb) >= 3:
|
||||
try:
|
||||
beds_raw = ubb[1].strip()
|
||||
beds = int(beds_raw) if beds_raw and beds_raw.replace(".", "").isdigit() else None
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
try:
|
||||
baths_raw = ubb[2].strip()
|
||||
baths = float(baths_raw) if baths_raw and baths_raw.replace(".", "").isdigit() else None
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# ─── Legacy contract (back-compat with existing orchestrator/LLM payload) ──
|
||||
return {
|
||||
# Required legacy keys
|
||||
"assessed_value": cy.get("assessed_value") or ly.get("assessed_value"),
|
||||
"market_value": cy.get("just_value") or ly.get("just_value"),
|
||||
"just_value": cy.get("just_value") or ly.get("just_value"),
|
||||
"year_built": rec.get("year_built"),
|
||||
"sqft": rec.get("under_air_sqft") or rec.get("adj_bldg_sqft"),
|
||||
"beds": beds,
|
||||
"baths": baths,
|
||||
"owner_name": owner_full,
|
||||
"source": "bcpa.net",
|
||||
# ─── Extended fields (Property Snapshot Report inputs) ────────────────
|
||||
"folio_number": rec.get("folio_number"),
|
||||
"mailing_address": rec.get("mailing_address"),
|
||||
"situs_address": rec.get("situs_address"),
|
||||
"neighborhood": rec.get("neighborhood"),
|
||||
"use_code": rec.get("use_code"),
|
||||
"millage_code": rec.get("millage_code"),
|
||||
"legal_description": rec.get("legal_description"),
|
||||
"adj_bldg_sqft": rec.get("adj_bldg_sqft"),
|
||||
"under_air_sqft": rec.get("under_air_sqft"),
|
||||
"effective_year": rec.get("effective_year"),
|
||||
"homestead_active": rec.get("homestead_active", False),
|
||||
"taxes_paid_last_year": ly.get("taxes_paid"),
|
||||
"tax_year_last": ly.get("tax_year"),
|
||||
"tax_year_current": cy.get("tax_year"),
|
||||
"current_year_values": cy,
|
||||
"last_year_values": ly,
|
||||
"two_years_ago_values": rec.get("two_years_ago", {}),
|
||||
"tax_breakdown": rec.get("tax_breakdown", {}),
|
||||
"sales_history": rec.get("sales_history", []),
|
||||
"photo_url": rec.get("photo_url"),
|
||||
"source_url": rec.get("source_url"),
|
||||
"source_api_url": rec.get("source_api_url"),
|
||||
"fetched_at": rec.get("fetched_at"),
|
||||
}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# Firecrawl comps (OPT-IN para no quemar credits)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _firecrawl_enabled() -> bool:
|
||||
"""Check env flag + key presence."""
|
||||
flag = os.getenv("ENABLE_FIRECRAWL_COMPS", "false").lower() == "true"
|
||||
has_key = bool(os.getenv("FIRECRAWL_API_KEY", "").strip())
|
||||
return flag and has_key
|
||||
|
||||
|
||||
def fetch_zillow_comps(
|
||||
zip_code: str,
|
||||
beds: int,
|
||||
baths: float,
|
||||
sqft: int,
|
||||
max_count: int = COMPS_MAX_COUNT,
|
||||
) -> tuple[list[dict], list[str]]:
|
||||
"""Fetch recently sold comps via Firecrawl scrape de Zillow.
|
||||
|
||||
Returns (comps_list, errors). Comps list capped at max_count.
|
||||
Cada comp: {address, sold_price, sold_date_text, sqft, price_per_sqft}
|
||||
|
||||
OPT-IN: requiere ENABLE_FIRECRAWL_COMPS=true en .env.
|
||||
"""
|
||||
errors: list[str] = []
|
||||
|
||||
if not _firecrawl_enabled():
|
||||
errors.append(
|
||||
"Firecrawl comps deshabilitado. Setear ENABLE_FIRECRAWL_COMPS=true en .env para activar."
|
||||
)
|
||||
return [], errors
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
except ImportError as e:
|
||||
errors.append(f"firecrawl-py no importable: {e}")
|
||||
return [], errors
|
||||
|
||||
api_key = os.getenv("FIRECRAWL_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
errors.append("FIRECRAWL_API_KEY ausente en .env")
|
||||
return [], errors
|
||||
|
||||
# Zillow recently sold URL para ZIP
|
||||
url = f"https://www.zillow.com/homes/recently_sold/{zip_code}_rb/"
|
||||
|
||||
try:
|
||||
app = FirecrawlApp(api_key=api_key)
|
||||
# Firecrawl SDK v2+: .scrape() (renamed from legacy .scrape_url()).
|
||||
# Returns Document object with .markdown attribute on success.
|
||||
result = app.scrape(url, formats=["markdown"])
|
||||
if not result or not hasattr(result, "markdown"):
|
||||
errors.append("Firecrawl devolvio resultado vacio")
|
||||
return [], errors
|
||||
md = result.markdown or ""
|
||||
except Exception as e:
|
||||
errors.append(f"Firecrawl scrape error: {e}")
|
||||
return [], errors
|
||||
|
||||
# Parser best-effort del markdown de Zillow.
|
||||
# Buscar bloques con: $price + sqft + address + sold date
|
||||
# Patrones tipicos en markdown de Zillow recently sold:
|
||||
# "$485,000" ... "1,450 sqft" ... "123 Main St" ... "Sold X/Y/Z"
|
||||
comps = _parse_zillow_markdown(md, beds=beds, baths=baths, sqft_target=sqft)
|
||||
if not comps:
|
||||
errors.append("Firecrawl OK pero parser no extrajo comps (Zillow cambio formato?)")
|
||||
|
||||
return comps[:max_count], errors
|
||||
|
||||
|
||||
def _parse_zillow_markdown(md: str, beds: int, baths: float, sqft_target: int) -> list[dict]:
|
||||
"""Best-effort parser de Zillow markdown.
|
||||
|
||||
Extrae bloques con price + sqft + address. Tolera variaciones.
|
||||
"""
|
||||
comps = []
|
||||
# Buscar todos los matches de precio + sqft cercanos
|
||||
# Pattern: $XXX,XXX (con o sin centavos) seguido en proximidad de "X,XXX sqft" o "X bd"
|
||||
price_pattern = re.compile(r"\$([\d,]{3,9})", re.IGNORECASE)
|
||||
sqft_pattern = re.compile(r"([\d,]{3,5})\s*sq(?:\.|uare)?\s*ft", re.IGNORECASE)
|
||||
sold_pattern = re.compile(r"(sold|vendido)[\s:]+([0-9/.-]+)", re.IGNORECASE)
|
||||
bed_pattern = re.compile(r"([\d.]+)\s*(?:bd|bed)", re.IGNORECASE)
|
||||
|
||||
# Segmentar md en bloques de ~500 chars que probablemente contengan 1 listing
|
||||
segments = re.split(r"\n\n+|---+", md)
|
||||
for seg in segments:
|
||||
if len(seg) < 50 or len(seg) > 2000:
|
||||
continue
|
||||
prices = price_pattern.findall(seg)
|
||||
sqfts = sqft_pattern.findall(seg)
|
||||
if not prices or not sqfts:
|
||||
continue
|
||||
try:
|
||||
price = int(prices[0].replace(",", ""))
|
||||
sqft = int(sqfts[0].replace(",", ""))
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
# Filtro: precio razonable para single home
|
||||
if price < 30_000 or price > 5_000_000:
|
||||
continue
|
||||
if sqft < 400 or sqft > 8_000:
|
||||
continue
|
||||
# Filtro: sqft cerca del subject (+/- COMPS_SQFT_TOLERANCE_PCT)
|
||||
if sqft_target > 0:
|
||||
ratio = sqft / sqft_target
|
||||
if ratio < (1 - COMPS_SQFT_TOLERANCE_PCT) or ratio > (1 + COMPS_SQFT_TOLERANCE_PCT):
|
||||
continue
|
||||
|
||||
sold_match = sold_pattern.search(seg)
|
||||
sold_date = sold_match.group(2) if sold_match else "?"
|
||||
|
||||
bed_match = bed_pattern.search(seg)
|
||||
comp_beds = bed_match.group(1) if bed_match else "?"
|
||||
|
||||
# Address best-effort: primera linea o primer link
|
||||
addr_match = re.search(r"\[([^\]]+(?:St|Ave|Rd|Dr|Ln|Way|Blvd|Ct|Ter|Pl)[^\]]*)\]", seg, re.IGNORECASE)
|
||||
address = addr_match.group(1) if addr_match else "(direccion no parseada)"
|
||||
|
||||
comps.append({
|
||||
"address": address,
|
||||
"sold_price": price,
|
||||
"sold_date": sold_date,
|
||||
"sqft": sqft,
|
||||
"beds_text": comp_beds,
|
||||
"price_per_sqft": round(price / sqft, 2) if sqft > 0 else 0,
|
||||
})
|
||||
|
||||
return comps
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# Estimacion de valor y inflation score
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def estimate_value_from_comps(comps: list[dict], subject_sqft: int) -> tuple[Optional[int], Optional[float]]:
|
||||
"""Calcula valor estimado mid + $/sqft promedio de comps."""
|
||||
if not comps or subject_sqft <= 0:
|
||||
return None, None
|
||||
prices_per_sqft = [c["price_per_sqft"] for c in comps if c.get("price_per_sqft", 0) > 0]
|
||||
if not prices_per_sqft:
|
||||
return None, None
|
||||
avg_ppsqft = sum(prices_per_sqft) / len(prices_per_sqft)
|
||||
estimated_mid = int(avg_ppsqft * subject_sqft)
|
||||
return estimated_mid, round(avg_ppsqft, 2)
|
||||
|
||||
|
||||
def calculate_inflation_score(listing_price: float, estimated_mid: float) -> float:
|
||||
"""Score 0-10 de cuanto el listing esta sobre el valor estimado.
|
||||
|
||||
0 = listing al valor o por debajo (subvaluado)
|
||||
5 = listing 10% sobre valor
|
||||
10 = listing 30%+ sobre valor (gravemente inflado)
|
||||
"""
|
||||
if estimated_mid <= 0:
|
||||
return 5.0 # default medium
|
||||
overpriced = (listing_price - estimated_mid) / estimated_mid
|
||||
if overpriced <= 0:
|
||||
return 0.0
|
||||
if overpriced >= 0.30:
|
||||
return 10.0
|
||||
return round(overpriced / 0.30 * 10, 1)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# API publica
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def fetch_property_value(
|
||||
*,
|
||||
address: str,
|
||||
listing_price: float,
|
||||
sqft: int,
|
||||
beds: int,
|
||||
baths: float,
|
||||
year_built: int,
|
||||
zip_code: Optional[str] = None,
|
||||
county_name: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
photo_findings_text: str = "",
|
||||
listing_description: str = "",
|
||||
condition_status: str = "",
|
||||
features_special: Optional[list] = None,
|
||||
include_firecrawl_comps: Optional[bool] = None,
|
||||
) -> dict:
|
||||
"""Entry point. Combina tax assessed + comps + deductions en un dict consolidado.
|
||||
|
||||
Bug fix 2026-05-15: ahora acepta listing_description, condition_status,
|
||||
features_special para que calculate_age_deductions pueda detectar listings
|
||||
renovados (e.g. condition='Updated/Remodeled' o description='Fully updated
|
||||
throughout, BRAND NEW ROOF, NEW AC') y suprimir deducciones falsas.
|
||||
|
||||
include_firecrawl_comps:
|
||||
None → usa el flag ENABLE_FIRECRAWL_COMPS de .env (default false)
|
||||
True → fuerza llamada a Firecrawl (consume credits)
|
||||
False → omite Firecrawl
|
||||
"""
|
||||
fetched_at = datetime.now(timezone.utc).isoformat()
|
||||
sources_used: list[str] = []
|
||||
errors: list[str] = []
|
||||
|
||||
# 1. Deductions por edad (siempre, gratis) — ahora respeta condition + keywords
|
||||
deductions = calculate_age_deductions(
|
||||
year_built=year_built,
|
||||
photo_findings_text=photo_findings_text,
|
||||
listing_description=listing_description,
|
||||
condition_status=condition_status,
|
||||
features_special=features_special,
|
||||
)
|
||||
if deductions["total"] > 0:
|
||||
sources_used.append("Deductions por edad (heuristica FL)")
|
||||
elif deductions.get("_skipped_global"):
|
||||
sources_used.append(
|
||||
f"Deductions SKIPPED (renovated: {deductions.get('_skip_reason', '?')})"
|
||||
)
|
||||
|
||||
# 2. Tax assessed (stub Miami-Dade por ahora)
|
||||
tax_assessed_data = fetch_tax_assessed(address, county_name, state)
|
||||
tax_assessed_value = None
|
||||
if tax_assessed_data:
|
||||
tax_assessed_value = tax_assessed_data.get("assessed_value")
|
||||
sources_used.append(f"Tax assessed ({county_name})")
|
||||
else:
|
||||
errors.append(f"Tax assessed no disponible para {county_name or '?'} (scraper pendiente Wave 2 follow-up)")
|
||||
|
||||
# 3. Firecrawl comps (opt-in)
|
||||
if include_firecrawl_comps is None:
|
||||
do_firecrawl = _firecrawl_enabled()
|
||||
else:
|
||||
do_firecrawl = include_firecrawl_comps
|
||||
|
||||
comps: list[dict] = []
|
||||
if do_firecrawl and zip_code:
|
||||
comps, comp_errors = fetch_zillow_comps(zip_code, beds, baths, sqft)
|
||||
errors.extend(comp_errors)
|
||||
if comps:
|
||||
sources_used.append(f"Zillow recently sold ({len(comps)} comps via Firecrawl)")
|
||||
|
||||
# 4. Calcular valor estimado
|
||||
estimated_mid_from_comps, avg_ppsqft = estimate_value_from_comps(comps, sqft)
|
||||
price_per_sqft_subject = round(listing_price / sqft, 2) if sqft > 0 else 0
|
||||
|
||||
# Combinar tax_assessed + comps + deductions para mid estimate
|
||||
candidates_mid = []
|
||||
if estimated_mid_from_comps:
|
||||
candidates_mid.append(estimated_mid_from_comps)
|
||||
if tax_assessed_value:
|
||||
# Tax assessed en FL suele ser ~85% del market value
|
||||
candidates_mid.append(int(tax_assessed_value / 0.85))
|
||||
|
||||
if candidates_mid:
|
||||
estimated_mid = int(sum(candidates_mid) / len(candidates_mid)) - deductions["total"]
|
||||
else:
|
||||
# Fallback: listing - deductions, con muy low confidence
|
||||
estimated_mid = max(0, int(listing_price) - deductions["total"])
|
||||
|
||||
estimated_mid = max(estimated_mid, 1) # nunca zero
|
||||
|
||||
estimated_low = int(estimated_mid * PRICE_LOW_PCT)
|
||||
estimated_high = int(estimated_mid * PRICE_HIGH_PCT)
|
||||
|
||||
# Confidence segun fuentes disponibles
|
||||
if comps and tax_assessed_value:
|
||||
confidence = "high"
|
||||
elif comps or tax_assessed_value:
|
||||
confidence = "medium"
|
||||
else:
|
||||
confidence = "low"
|
||||
|
||||
overpriced_pct = None
|
||||
if estimated_mid > 0:
|
||||
overpriced_pct = round((listing_price - estimated_mid) / estimated_mid * 100, 1)
|
||||
|
||||
inflation_score = calculate_inflation_score(listing_price, estimated_mid)
|
||||
|
||||
return {
|
||||
"listing_price": int(listing_price),
|
||||
"tax_assessed_value": tax_assessed_value,
|
||||
"comps_used": comps,
|
||||
"estimated_value": {
|
||||
"low": estimated_low,
|
||||
"mid": estimated_mid,
|
||||
"high": estimated_high,
|
||||
"confidence": confidence,
|
||||
},
|
||||
"price_per_sqft_comps_avg": avg_ppsqft,
|
||||
"price_per_sqft_subject": price_per_sqft_subject,
|
||||
"overpriced_pct": overpriced_pct,
|
||||
"inflation_score": inflation_score,
|
||||
"deductions": deductions,
|
||||
"market_trend": {
|
||||
"direction": "unknown",
|
||||
"evidence": "Para detectar tendencia requiere historial de comps (no implementado en MVP)",
|
||||
},
|
||||
"sources_used": sources_used,
|
||||
"fetch_errors": errors,
|
||||
"firecrawl_used": do_firecrawl and bool(comps),
|
||||
"fetched_at": fetched_at,
|
||||
}
|
||||
Reference in New Issue
Block a user