546 lines
21 KiB
Python
546 lines
21 KiB
Python
"""data_fetchers/property_appraiser.py — Unified PA router.
|
|
|
|
Source of Truth para CUALQUIER propiedad en USA. El pre-screening llama a
|
|
esta funcion COMO PRIMER paso para evitar inferencias erroneas sobre listing
|
|
data (Zillow puede mentir, ser viejo, o estar incompleto).
|
|
|
|
USAGE:
|
|
from data_fetchers.property_appraiser import fetch_pa_record, is_pa_supported
|
|
|
|
if is_pa_supported(county_name, state):
|
|
record = fetch_pa_record(
|
|
address="2352 SCENIC VIEW CT",
|
|
parcel_id=None,
|
|
county_name="Duval",
|
|
state="FL",
|
|
zip_code="32218",
|
|
)
|
|
|
|
UNIFIED RETURN SCHEMA (cada adapter llena lo que pueda; campos faltantes = None):
|
|
{
|
|
# Identity
|
|
"parcel_id": str, # County-specific RE#/folio
|
|
"owner_name": str, # Primary owner
|
|
"co_owners": [str], # If multiple
|
|
"mailing_address": str,
|
|
"site_address": str, # Property address
|
|
"owner_address_mismatch": bool, # mailing != site (absentee owner)
|
|
# Building
|
|
"year_built": int,
|
|
"effective_year_built": int | None, # If county exposes renovations
|
|
"sqft_heated": int,
|
|
"sqft_total": int,
|
|
"lot_acres": float,
|
|
"lot_total_sqft": int,
|
|
"bedrooms": int,
|
|
"baths": float,
|
|
"stories": float,
|
|
"building_type": str,
|
|
"roof_type": str,
|
|
"exterior_wall": str,
|
|
"interior_wall": str,
|
|
"int_flooring": str,
|
|
# Values
|
|
"just_value_current": int,
|
|
"assessed_value_current": int,
|
|
"taxable_value_current": int,
|
|
"exemption_current": int,
|
|
"just_value_last": int,
|
|
"assessed_value_last": int,
|
|
"taxes_paid_last": float,
|
|
"tax_year_current": int,
|
|
"tax_year_last": int,
|
|
# Owner signals
|
|
"homestead_active": bool, # primary residence flag
|
|
"homestead_amount": int,
|
|
# Sales history (most recent first)
|
|
"sales_history": [
|
|
{
|
|
"date": "MM/DD/YYYY",
|
|
"price": int,
|
|
"deed_type": str, # "Warranty Deed", "Quit Claim", etc.
|
|
"qualified": str, # "Qualified" | "Unqualified"
|
|
"vacant_improved": str, # "Vacant" | "Improved"
|
|
"book_page": str,
|
|
}, ...
|
|
],
|
|
# Improvements / permits (when county exposes)
|
|
"improvements": [
|
|
{"year": int, "type": str, "permit": str | None}, ...
|
|
],
|
|
# Computed signals
|
|
"most_recent_qualified_sale": dict | None,
|
|
"renovation_signal": { # Heuristic flip/renov detection
|
|
"is_flip_pattern": bool,
|
|
"evidence": str,
|
|
"value_increase_pct": float,
|
|
"months_between": int,
|
|
},
|
|
# Land
|
|
"zoning": str,
|
|
"use_code": str,
|
|
"use_description": str,
|
|
# Metadata
|
|
"county": str,
|
|
"state": str,
|
|
"source": str, # "Duval PA (paopropertysearch.coj.net)" etc.
|
|
"source_url": str,
|
|
"fetched_at": ISO timestamp,
|
|
"errors": [str],
|
|
}
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import Optional
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
# County → adapter mapping
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
|
|
# Counties with FULL extractors (returning rich unified schema).
|
|
# Cuando se agreguen Palm Beach, Orange, Hillsborough, etc → sumarlos aca.
|
|
_SUPPORTED_COUNTIES: dict[str, str] = {
|
|
# county_lowercase: state
|
|
"duval": "FL",
|
|
"broward": "FL",
|
|
"miami-dade": "FL",
|
|
"palm beach": "FL",
|
|
}
|
|
|
|
|
|
def is_pa_supported(county_name: Optional[str], state: Optional[str]) -> bool:
|
|
"""True si hay full PA extractor para este county."""
|
|
if not county_name or not state:
|
|
return False
|
|
key = county_name.lower().replace(" county", "").strip()
|
|
return _SUPPORTED_COUNTIES.get(key) == state.upper()
|
|
|
|
|
|
def list_supported_counties() -> list[tuple[str, str]]:
|
|
"""Returns [(county_lower, state_upper), ...] de counties con PA full."""
|
|
return [(c, s) for c, s in _SUPPORTED_COUNTIES.items()]
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
# Main entry point
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
|
|
def fetch_pa_record(
|
|
*,
|
|
county_name: Optional[str],
|
|
state: Optional[str] = "FL",
|
|
address: Optional[str] = None,
|
|
parcel_id: Optional[str] = None,
|
|
zip_code: Optional[str] = None,
|
|
timeout_seconds: int = 45,
|
|
listing_price: Optional[float] = None,
|
|
) -> Optional[dict]:
|
|
"""Fetch PA record for a property using the county-specific adapter.
|
|
|
|
Args:
|
|
listing_price: optional — enables flip-in-progress detection
|
|
(recent qualified sale << listing → owner is flipping)
|
|
|
|
Returns:
|
|
Unified dict with PA data, or None if county not supported.
|
|
"""
|
|
if not is_pa_supported(county_name, state):
|
|
return None
|
|
|
|
key = county_name.lower().replace(" county", "").strip()
|
|
|
|
if key == "duval":
|
|
return _fetch_duval(address=address, parcel_id=parcel_id,
|
|
zip_code=zip_code, timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price)
|
|
if key == "broward":
|
|
return _fetch_broward(parcel_id=parcel_id, timeout_seconds=timeout_seconds)
|
|
if key == "miami-dade" or key == "miami dade":
|
|
return _fetch_miami_dade(parcel_id=parcel_id, address=address,
|
|
timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price)
|
|
if key == "palm beach":
|
|
return _fetch_palm_beach(parcel_id=parcel_id, timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price)
|
|
|
|
return None
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
# Adapter wrappers (normalize per-county output to unified schema)
|
|
# ════════════════════════════════════════════════════════════════════════════
|
|
|
|
def _fetch_duval(
|
|
*,
|
|
address: Optional[str],
|
|
parcel_id: Optional[str],
|
|
zip_code: Optional[str],
|
|
timeout_seconds: int,
|
|
listing_price: Optional[float] = None,
|
|
) -> dict:
|
|
"""Duval adapter wrapper: pa_duval.fetch_duval_pa_record → unified schema."""
|
|
try:
|
|
from data_fetchers.pa_duval import fetch_duval_pa_record
|
|
except ImportError as e:
|
|
return {
|
|
"county": "Duval",
|
|
"state": "FL",
|
|
"errors": [f"pa_duval module import failed: {e}"],
|
|
}
|
|
|
|
raw = fetch_duval_pa_record(
|
|
address=address,
|
|
parcel_id=parcel_id,
|
|
zip_code=zip_code,
|
|
timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price,
|
|
)
|
|
|
|
# Normalize raw → unified schema
|
|
site_addr = " ".join(filter(None, [raw.get("site_address_line1"), raw.get("site_address_line2")]))
|
|
return {
|
|
# Identity
|
|
"parcel_id": raw.get("parcel_id"),
|
|
"owner_name": raw.get("owner_name"),
|
|
"co_owners": [], # Duval shows one owner; multi-owner detection pending
|
|
"mailing_address": None, # not in current detail extraction
|
|
"site_address": site_addr.strip(),
|
|
"owner_address_mismatch": None,
|
|
# Building
|
|
"year_built": raw.get("year_built"),
|
|
"effective_year_built": None, # Duval doesn't expose explicitly
|
|
"sqft_heated": raw.get("sqft_heated"),
|
|
"sqft_total": raw.get("sqft_gross"),
|
|
"lot_acres": None,
|
|
"lot_total_sqft": raw.get("lot_total_sqft"),
|
|
"bedrooms": int(raw.get("bedrooms") or 0) or None,
|
|
"baths": raw.get("baths"),
|
|
"stories": raw.get("stories"),
|
|
"building_type": raw.get("building_type"),
|
|
"roof_type": raw.get("roof_struct"),
|
|
"roofing_cover": raw.get("roofing_cover"),
|
|
"exterior_wall": raw.get("exterior_wall"),
|
|
"interior_wall": raw.get("interior_wall"),
|
|
"int_flooring": raw.get("int_flooring"),
|
|
# Values
|
|
"just_value_current": raw.get("tax_current_year_just"),
|
|
"assessed_value_current": raw.get("tax_current_year_assessed"),
|
|
"taxable_value_current": raw.get("tax_current_year_taxable"),
|
|
"exemption_current": raw.get("tax_current_year_exemptions"),
|
|
"just_value_last": raw.get("tax_last_year_just"),
|
|
"assessed_value_last": raw.get("tax_last_year_assessed"),
|
|
"taxable_value_last": raw.get("tax_last_year_taxable"),
|
|
"taxes_paid_last": None, # Duval doesn't show direct tax amount here
|
|
"tax_year_current": None,
|
|
"tax_year_last": None,
|
|
# Owner signals
|
|
"homestead_active": raw.get("homestead_active"),
|
|
"homestead_amount": raw.get("homestead_amount_current"),
|
|
# Sales history
|
|
"sales_history": raw.get("sales_history", []),
|
|
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
|
|
# Computed signals
|
|
"renovation_signal": raw.get("renovation_signal"),
|
|
# Improvements (Duval no expone formales; backlog para Acclaim integration)
|
|
"improvements": [],
|
|
# Land
|
|
"zoning": (raw.get("land") or {}).get("zoning"),
|
|
"use_code": raw.get("property_use") or "",
|
|
"use_description": (raw.get("land") or {}).get("use_description"),
|
|
"subdivision": raw.get("subdivision"),
|
|
"legal_description": None, # raw has gridLegal but not parsed to flat string here
|
|
# Metadata
|
|
"county": "Duval",
|
|
"state": "FL",
|
|
"source": raw.get("source"),
|
|
"source_url": raw.get("source_url"),
|
|
"fetched_at": raw.get("fetched_at"),
|
|
"errors": raw.get("errors", []),
|
|
# Raw passthrough for advanced consumers
|
|
"_raw": raw,
|
|
}
|
|
|
|
|
|
def _fetch_broward(*, parcel_id: Optional[str], timeout_seconds: int) -> dict:
|
|
"""Broward adapter wrapper: pa_broward.fetch_broward_pa_record → unified schema."""
|
|
if not parcel_id:
|
|
return {
|
|
"county": "Broward",
|
|
"state": "FL",
|
|
"errors": ["Broward PA needs parcel_id (folio); address search not yet supported"],
|
|
}
|
|
|
|
try:
|
|
from data_fetchers.pa_broward import fetch_broward_pa_record
|
|
except ImportError as e:
|
|
return {
|
|
"county": "Broward",
|
|
"state": "FL",
|
|
"errors": [f"pa_broward module import failed: {e}"],
|
|
}
|
|
|
|
raw = fetch_broward_pa_record(parcel_id, timeout_seconds=timeout_seconds)
|
|
cy = raw.get("current_year") or {}
|
|
ly = raw.get("last_year") or {}
|
|
|
|
# Concatenate owner names if continuation
|
|
owner_full = (raw.get("owner_name") or "")
|
|
if raw.get("owner_name_2"):
|
|
owner_full = f"{owner_full} {raw['owner_name_2']}".strip()
|
|
|
|
# Detect address mismatch
|
|
mailing = (raw.get("mailing_address") or "").upper()
|
|
site = (raw.get("situs_address") or "").upper()
|
|
owner_addr_mismatch = bool(mailing and site and mailing.split()[0] != site.split()[0])
|
|
|
|
# Parse beds/baths from units_beds_baths
|
|
beds = baths = None
|
|
ubb = (raw.get("units_beds_baths") or "").split("/")
|
|
if len(ubb) >= 3:
|
|
for raw_v, key in [(ubb[1], "beds"), (ubb[2], "baths")]:
|
|
v = raw_v.strip()
|
|
try:
|
|
if key == "beds":
|
|
beds = int(v) if v.replace(".", "").isdigit() else None
|
|
else:
|
|
baths = float(v) if v.replace(".", "").isdigit() else None
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
return {
|
|
"parcel_id": raw.get("folio_number"),
|
|
"owner_name": owner_full,
|
|
"co_owners": [],
|
|
"mailing_address": raw.get("mailing_address"),
|
|
"site_address": raw.get("situs_address"),
|
|
"owner_address_mismatch": owner_addr_mismatch,
|
|
# Building
|
|
"year_built": raw.get("year_built"),
|
|
"effective_year_built": raw.get("effective_year"),
|
|
"sqft_heated": raw.get("under_air_sqft"),
|
|
"sqft_total": raw.get("adj_bldg_sqft"),
|
|
"lot_acres": None,
|
|
"lot_total_sqft": None,
|
|
"bedrooms": beds,
|
|
"baths": baths,
|
|
"stories": None,
|
|
"building_type": raw.get("use_code"),
|
|
"roof_type": None,
|
|
"roofing_cover": None,
|
|
"exterior_wall": None,
|
|
"interior_wall": None,
|
|
"int_flooring": None,
|
|
# Values
|
|
"just_value_current": cy.get("just_value"),
|
|
"assessed_value_current": cy.get("assessed_value"),
|
|
"taxable_value_current": (raw.get("tax_breakdown") or {}).get("county", {}).get("taxable"),
|
|
"exemption_current": (raw.get("tax_breakdown") or {}).get("county", {}).get("homestead", 0),
|
|
"just_value_last": ly.get("just_value"),
|
|
"assessed_value_last": ly.get("assessed_value"),
|
|
"taxable_value_last": None,
|
|
"taxes_paid_last": ly.get("taxes_paid"),
|
|
"tax_year_current": cy.get("tax_year"),
|
|
"tax_year_last": ly.get("tax_year"),
|
|
# Owner signals
|
|
"homestead_active": raw.get("homestead_active"),
|
|
"homestead_amount": (raw.get("tax_breakdown") or {}).get("county", {}).get("homestead", 0),
|
|
# Sales history
|
|
"sales_history": raw.get("sales_history", []),
|
|
"most_recent_qualified_sale": None, # not separately calculated in pa_broward
|
|
# Computed
|
|
"renovation_signal": None, # pa_broward doesn't compute this yet
|
|
"improvements": [],
|
|
# Land
|
|
"zoning": None,
|
|
"use_code": raw.get("use_code"),
|
|
"use_description": raw.get("use_code"),
|
|
"subdivision": raw.get("neighborhood"),
|
|
"legal_description": raw.get("legal_description"),
|
|
# Metadata
|
|
"county": "Broward",
|
|
"state": "FL",
|
|
"source": "Broward Property Appraiser (bcpa.net)",
|
|
"source_url": raw.get("source_url"),
|
|
"fetched_at": raw.get("fetched_at"),
|
|
"errors": raw.get("errors", []),
|
|
"_raw": raw,
|
|
}
|
|
|
|
|
|
def _fetch_palm_beach(
|
|
*,
|
|
parcel_id: Optional[str],
|
|
timeout_seconds: int,
|
|
listing_price: Optional[float] = None,
|
|
) -> dict:
|
|
"""Palm Beach wrapper: pa_palm_beach.fetch_palm_beach_pa_record → unified."""
|
|
if not parcel_id:
|
|
return {
|
|
"county": "Palm Beach",
|
|
"state": "FL",
|
|
"errors": ["Palm Beach PA needs parcel_id (PCN); address search not yet supported"],
|
|
}
|
|
try:
|
|
from data_fetchers.pa_palm_beach import fetch_palm_beach_pa_record
|
|
except ImportError as e:
|
|
return {
|
|
"county": "Palm Beach",
|
|
"state": "FL",
|
|
"errors": [f"pa_palm_beach module import failed: {e}"],
|
|
}
|
|
|
|
raw = fetch_palm_beach_pa_record(
|
|
parcel_id=parcel_id,
|
|
timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price,
|
|
)
|
|
|
|
# Mailing/site address mismatch heuristic: PB doesn't expose mailing
|
|
# separately in flat parser; site_address may include "Municipality" noise.
|
|
return {
|
|
"parcel_id": raw.get("parcel_id"),
|
|
"owner_name": raw.get("owner_name"),
|
|
"co_owners": [],
|
|
"mailing_address": None,
|
|
"site_address": raw.get("site_address"),
|
|
"owner_address_mismatch": None,
|
|
# Building
|
|
"year_built": raw.get("year_built"),
|
|
"effective_year_built": None,
|
|
"sqft_heated": raw.get("sqft_heated"),
|
|
"sqft_total": raw.get("sqft_total"),
|
|
"lot_acres": raw.get("lot_acres"),
|
|
"lot_total_sqft": None,
|
|
"bedrooms": raw.get("bedrooms"),
|
|
"baths": raw.get("baths"),
|
|
"stories": None,
|
|
"building_type": raw.get("use_code"),
|
|
"roof_type": raw.get("roof_struct"),
|
|
"roofing_cover": raw.get("roof_cover"),
|
|
"exterior_wall": None,
|
|
"interior_wall": raw.get("interior_wall"),
|
|
"int_flooring": None,
|
|
# Values
|
|
"just_value_current": raw.get("just_value_current"),
|
|
"assessed_value_current": raw.get("assessed_value_current"),
|
|
"taxable_value_current": None,
|
|
"exemption_current": None,
|
|
"just_value_last": raw.get("just_value_last"),
|
|
"assessed_value_last": raw.get("assessed_value_last"),
|
|
"taxable_value_last": None,
|
|
"taxes_paid_last": None,
|
|
"tax_year_current": raw.get("tax_year_current"),
|
|
"tax_year_last": raw.get("tax_year_last"),
|
|
# Owner signals
|
|
"homestead_active": raw.get("homestead_active"),
|
|
"homestead_amount": None,
|
|
# Sales
|
|
"sales_history": raw.get("sales_history", []),
|
|
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
|
|
"renovation_signal": raw.get("renovation_signal"),
|
|
"improvements": [],
|
|
# Land
|
|
"zoning": raw.get("zoning"),
|
|
"use_code": raw.get("use_code"),
|
|
"use_description": raw.get("use_code"),
|
|
"subdivision": raw.get("subdivision"),
|
|
"legal_description": raw.get("legal_description"),
|
|
# Metadata
|
|
"county": "Palm Beach",
|
|
"state": "FL",
|
|
"source": raw.get("source"),
|
|
"source_url": raw.get("source_url"),
|
|
"fetched_at": raw.get("fetched_at"),
|
|
"errors": raw.get("errors", []),
|
|
"_raw": raw,
|
|
}
|
|
|
|
|
|
def _fetch_miami_dade(
|
|
*,
|
|
parcel_id: Optional[str],
|
|
address: Optional[str],
|
|
timeout_seconds: int,
|
|
listing_price: Optional[float] = None,
|
|
) -> dict:
|
|
"""Miami-Dade adapter wrapper: pa_miami_dade.fetch_miami_dade_pa_record → unified schema."""
|
|
try:
|
|
from data_fetchers.pa_miami_dade import fetch_miami_dade_pa_record
|
|
except ImportError as e:
|
|
return {
|
|
"county": "Miami-Dade",
|
|
"state": "FL",
|
|
"errors": [f"pa_miami_dade module import failed: {e}"],
|
|
}
|
|
|
|
raw = fetch_miami_dade_pa_record(
|
|
parcel_id=parcel_id,
|
|
address=address,
|
|
timeout_seconds=timeout_seconds,
|
|
listing_price=listing_price,
|
|
)
|
|
|
|
# Normalize to unified schema
|
|
return {
|
|
# Identity
|
|
"parcel_id": raw.get("parcel_id"),
|
|
"owner_name": raw.get("owner_name"),
|
|
"co_owners": raw.get("co_owners", []),
|
|
"mailing_address": raw.get("mailing_address"),
|
|
"site_address": raw.get("site_address"),
|
|
# Owner address mismatch — primitive heuristic (Miami-Dade doesn't expose
|
|
# separate site/mailing comparison cleanly; can compute later if needed)
|
|
"owner_address_mismatch": None,
|
|
# Building
|
|
"year_built": raw.get("year_built"),
|
|
"effective_year_built": None, # Miami-Dade doesn't expose explicitly
|
|
"sqft_heated": raw.get("sqft_heated"),
|
|
"sqft_total": raw.get("sqft_total"),
|
|
"lot_acres": None,
|
|
"lot_total_sqft": raw.get("lot_total_sqft"),
|
|
"bedrooms": raw.get("bedrooms"),
|
|
"baths": raw.get("baths"),
|
|
"stories": raw.get("floors"),
|
|
"building_type": raw.get("use_code"),
|
|
"roof_type": None,
|
|
"roofing_cover": None,
|
|
"exterior_wall": None,
|
|
"interior_wall": None,
|
|
"int_flooring": None,
|
|
# Values
|
|
"just_value_current": raw.get("just_value_current"),
|
|
"assessed_value_current": raw.get("assessed_value_current"),
|
|
"taxable_value_current": None, # in pa-taxablevalueinformation, not parsed yet
|
|
"exemption_current": None,
|
|
"just_value_last": raw.get("just_value_last"),
|
|
"assessed_value_last": raw.get("assessed_value_last"),
|
|
"taxable_value_last": None,
|
|
"taxes_paid_last": None,
|
|
"tax_year_current": raw.get("tax_year_current"),
|
|
"tax_year_last": raw.get("tax_year_last"),
|
|
# Owner signals
|
|
"homestead_active": raw.get("homestead_active"),
|
|
"homestead_amount": None,
|
|
# Sales history
|
|
"sales_history": raw.get("sales_history", []),
|
|
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
|
|
# Renovation
|
|
"renovation_signal": raw.get("renovation_signal"),
|
|
"improvements": [],
|
|
# Land
|
|
"zoning": raw.get("pa_primary_zone"),
|
|
"use_code": raw.get("use_code"),
|
|
"use_description": raw.get("use_description"),
|
|
"subdivision": raw.get("subdivision"),
|
|
"legal_description": raw.get("legal_description"),
|
|
# Metadata
|
|
"county": "Miami-Dade",
|
|
"state": "FL",
|
|
"source": raw.get("source"),
|
|
"source_url": raw.get("source_url"),
|
|
"fetched_at": raw.get("fetched_at"),
|
|
"errors": raw.get("errors", []),
|
|
"_raw": raw,
|
|
}
|