feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+545
View File
@@ -0,0 +1,545 @@
"""data_fetchers/property_appraiser.py — Unified PA router.
Source of Truth para CUALQUIER propiedad en USA. El pre-screening llama a
esta funcion COMO PRIMER paso para evitar inferencias erroneas sobre listing
data (Zillow puede mentir, ser viejo, o estar incompleto).
USAGE:
from data_fetchers.property_appraiser import fetch_pa_record, is_pa_supported
if is_pa_supported(county_name, state):
record = fetch_pa_record(
address="2352 SCENIC VIEW CT",
parcel_id=None,
county_name="Duval",
state="FL",
zip_code="32218",
)
UNIFIED RETURN SCHEMA (cada adapter llena lo que pueda; campos faltantes = None):
{
# Identity
"parcel_id": str, # County-specific RE#/folio
"owner_name": str, # Primary owner
"co_owners": [str], # If multiple
"mailing_address": str,
"site_address": str, # Property address
"owner_address_mismatch": bool, # mailing != site (absentee owner)
# Building
"year_built": int,
"effective_year_built": int | None, # If county exposes renovations
"sqft_heated": int,
"sqft_total": int,
"lot_acres": float,
"lot_total_sqft": int,
"bedrooms": int,
"baths": float,
"stories": float,
"building_type": str,
"roof_type": str,
"exterior_wall": str,
"interior_wall": str,
"int_flooring": str,
# Values
"just_value_current": int,
"assessed_value_current": int,
"taxable_value_current": int,
"exemption_current": int,
"just_value_last": int,
"assessed_value_last": int,
"taxes_paid_last": float,
"tax_year_current": int,
"tax_year_last": int,
# Owner signals
"homestead_active": bool, # primary residence flag
"homestead_amount": int,
# Sales history (most recent first)
"sales_history": [
{
"date": "MM/DD/YYYY",
"price": int,
"deed_type": str, # "Warranty Deed", "Quit Claim", etc.
"qualified": str, # "Qualified" | "Unqualified"
"vacant_improved": str, # "Vacant" | "Improved"
"book_page": str,
}, ...
],
# Improvements / permits (when county exposes)
"improvements": [
{"year": int, "type": str, "permit": str | None}, ...
],
# Computed signals
"most_recent_qualified_sale": dict | None,
"renovation_signal": { # Heuristic flip/renov detection
"is_flip_pattern": bool,
"evidence": str,
"value_increase_pct": float,
"months_between": int,
},
# Land
"zoning": str,
"use_code": str,
"use_description": str,
# Metadata
"county": str,
"state": str,
"source": str, # "Duval PA (paopropertysearch.coj.net)" etc.
"source_url": str,
"fetched_at": ISO timestamp,
"errors": [str],
}
"""
from __future__ import annotations
from typing import Optional
# ════════════════════════════════════════════════════════════════════════════
# County → adapter mapping
# ════════════════════════════════════════════════════════════════════════════
# Counties with FULL extractors (returning rich unified schema).
# Cuando se agreguen Palm Beach, Orange, Hillsborough, etc → sumarlos aca.
_SUPPORTED_COUNTIES: dict[str, str] = {
# county_lowercase: state
"duval": "FL",
"broward": "FL",
"miami-dade": "FL",
"palm beach": "FL",
}
def is_pa_supported(county_name: Optional[str], state: Optional[str]) -> bool:
"""True si hay full PA extractor para este county."""
if not county_name or not state:
return False
key = county_name.lower().replace(" county", "").strip()
return _SUPPORTED_COUNTIES.get(key) == state.upper()
def list_supported_counties() -> list[tuple[str, str]]:
"""Returns [(county_lower, state_upper), ...] de counties con PA full."""
return [(c, s) for c, s in _SUPPORTED_COUNTIES.items()]
# ════════════════════════════════════════════════════════════════════════════
# Main entry point
# ════════════════════════════════════════════════════════════════════════════
def fetch_pa_record(
*,
county_name: Optional[str],
state: Optional[str] = "FL",
address: Optional[str] = None,
parcel_id: Optional[str] = None,
zip_code: Optional[str] = None,
timeout_seconds: int = 45,
listing_price: Optional[float] = None,
) -> Optional[dict]:
"""Fetch PA record for a property using the county-specific adapter.
Args:
listing_price: optional — enables flip-in-progress detection
(recent qualified sale << listing → owner is flipping)
Returns:
Unified dict with PA data, or None if county not supported.
"""
if not is_pa_supported(county_name, state):
return None
key = county_name.lower().replace(" county", "").strip()
if key == "duval":
return _fetch_duval(address=address, parcel_id=parcel_id,
zip_code=zip_code, timeout_seconds=timeout_seconds,
listing_price=listing_price)
if key == "broward":
return _fetch_broward(parcel_id=parcel_id, timeout_seconds=timeout_seconds)
if key == "miami-dade" or key == "miami dade":
return _fetch_miami_dade(parcel_id=parcel_id, address=address,
timeout_seconds=timeout_seconds,
listing_price=listing_price)
if key == "palm beach":
return _fetch_palm_beach(parcel_id=parcel_id, timeout_seconds=timeout_seconds,
listing_price=listing_price)
return None
# ════════════════════════════════════════════════════════════════════════════
# Adapter wrappers (normalize per-county output to unified schema)
# ════════════════════════════════════════════════════════════════════════════
def _fetch_duval(
*,
address: Optional[str],
parcel_id: Optional[str],
zip_code: Optional[str],
timeout_seconds: int,
listing_price: Optional[float] = None,
) -> dict:
"""Duval adapter wrapper: pa_duval.fetch_duval_pa_record → unified schema."""
try:
from data_fetchers.pa_duval import fetch_duval_pa_record
except ImportError as e:
return {
"county": "Duval",
"state": "FL",
"errors": [f"pa_duval module import failed: {e}"],
}
raw = fetch_duval_pa_record(
address=address,
parcel_id=parcel_id,
zip_code=zip_code,
timeout_seconds=timeout_seconds,
listing_price=listing_price,
)
# Normalize raw → unified schema
site_addr = " ".join(filter(None, [raw.get("site_address_line1"), raw.get("site_address_line2")]))
return {
# Identity
"parcel_id": raw.get("parcel_id"),
"owner_name": raw.get("owner_name"),
"co_owners": [], # Duval shows one owner; multi-owner detection pending
"mailing_address": None, # not in current detail extraction
"site_address": site_addr.strip(),
"owner_address_mismatch": None,
# Building
"year_built": raw.get("year_built"),
"effective_year_built": None, # Duval doesn't expose explicitly
"sqft_heated": raw.get("sqft_heated"),
"sqft_total": raw.get("sqft_gross"),
"lot_acres": None,
"lot_total_sqft": raw.get("lot_total_sqft"),
"bedrooms": int(raw.get("bedrooms") or 0) or None,
"baths": raw.get("baths"),
"stories": raw.get("stories"),
"building_type": raw.get("building_type"),
"roof_type": raw.get("roof_struct"),
"roofing_cover": raw.get("roofing_cover"),
"exterior_wall": raw.get("exterior_wall"),
"interior_wall": raw.get("interior_wall"),
"int_flooring": raw.get("int_flooring"),
# Values
"just_value_current": raw.get("tax_current_year_just"),
"assessed_value_current": raw.get("tax_current_year_assessed"),
"taxable_value_current": raw.get("tax_current_year_taxable"),
"exemption_current": raw.get("tax_current_year_exemptions"),
"just_value_last": raw.get("tax_last_year_just"),
"assessed_value_last": raw.get("tax_last_year_assessed"),
"taxable_value_last": raw.get("tax_last_year_taxable"),
"taxes_paid_last": None, # Duval doesn't show direct tax amount here
"tax_year_current": None,
"tax_year_last": None,
# Owner signals
"homestead_active": raw.get("homestead_active"),
"homestead_amount": raw.get("homestead_amount_current"),
# Sales history
"sales_history": raw.get("sales_history", []),
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
# Computed signals
"renovation_signal": raw.get("renovation_signal"),
# Improvements (Duval no expone formales; backlog para Acclaim integration)
"improvements": [],
# Land
"zoning": (raw.get("land") or {}).get("zoning"),
"use_code": raw.get("property_use") or "",
"use_description": (raw.get("land") or {}).get("use_description"),
"subdivision": raw.get("subdivision"),
"legal_description": None, # raw has gridLegal but not parsed to flat string here
# Metadata
"county": "Duval",
"state": "FL",
"source": raw.get("source"),
"source_url": raw.get("source_url"),
"fetched_at": raw.get("fetched_at"),
"errors": raw.get("errors", []),
# Raw passthrough for advanced consumers
"_raw": raw,
}
def _fetch_broward(*, parcel_id: Optional[str], timeout_seconds: int) -> dict:
"""Broward adapter wrapper: pa_broward.fetch_broward_pa_record → unified schema."""
if not parcel_id:
return {
"county": "Broward",
"state": "FL",
"errors": ["Broward PA needs parcel_id (folio); address search not yet supported"],
}
try:
from data_fetchers.pa_broward import fetch_broward_pa_record
except ImportError as e:
return {
"county": "Broward",
"state": "FL",
"errors": [f"pa_broward module import failed: {e}"],
}
raw = fetch_broward_pa_record(parcel_id, timeout_seconds=timeout_seconds)
cy = raw.get("current_year") or {}
ly = raw.get("last_year") or {}
# Concatenate owner names if continuation
owner_full = (raw.get("owner_name") or "")
if raw.get("owner_name_2"):
owner_full = f"{owner_full} {raw['owner_name_2']}".strip()
# Detect address mismatch
mailing = (raw.get("mailing_address") or "").upper()
site = (raw.get("situs_address") or "").upper()
owner_addr_mismatch = bool(mailing and site and mailing.split()[0] != site.split()[0])
# Parse beds/baths from units_beds_baths
beds = baths = None
ubb = (raw.get("units_beds_baths") or "").split("/")
if len(ubb) >= 3:
for raw_v, key in [(ubb[1], "beds"), (ubb[2], "baths")]:
v = raw_v.strip()
try:
if key == "beds":
beds = int(v) if v.replace(".", "").isdigit() else None
else:
baths = float(v) if v.replace(".", "").isdigit() else None
except (ValueError, IndexError):
pass
return {
"parcel_id": raw.get("folio_number"),
"owner_name": owner_full,
"co_owners": [],
"mailing_address": raw.get("mailing_address"),
"site_address": raw.get("situs_address"),
"owner_address_mismatch": owner_addr_mismatch,
# Building
"year_built": raw.get("year_built"),
"effective_year_built": raw.get("effective_year"),
"sqft_heated": raw.get("under_air_sqft"),
"sqft_total": raw.get("adj_bldg_sqft"),
"lot_acres": None,
"lot_total_sqft": None,
"bedrooms": beds,
"baths": baths,
"stories": None,
"building_type": raw.get("use_code"),
"roof_type": None,
"roofing_cover": None,
"exterior_wall": None,
"interior_wall": None,
"int_flooring": None,
# Values
"just_value_current": cy.get("just_value"),
"assessed_value_current": cy.get("assessed_value"),
"taxable_value_current": (raw.get("tax_breakdown") or {}).get("county", {}).get("taxable"),
"exemption_current": (raw.get("tax_breakdown") or {}).get("county", {}).get("homestead", 0),
"just_value_last": ly.get("just_value"),
"assessed_value_last": ly.get("assessed_value"),
"taxable_value_last": None,
"taxes_paid_last": ly.get("taxes_paid"),
"tax_year_current": cy.get("tax_year"),
"tax_year_last": ly.get("tax_year"),
# Owner signals
"homestead_active": raw.get("homestead_active"),
"homestead_amount": (raw.get("tax_breakdown") or {}).get("county", {}).get("homestead", 0),
# Sales history
"sales_history": raw.get("sales_history", []),
"most_recent_qualified_sale": None, # not separately calculated in pa_broward
# Computed
"renovation_signal": None, # pa_broward doesn't compute this yet
"improvements": [],
# Land
"zoning": None,
"use_code": raw.get("use_code"),
"use_description": raw.get("use_code"),
"subdivision": raw.get("neighborhood"),
"legal_description": raw.get("legal_description"),
# Metadata
"county": "Broward",
"state": "FL",
"source": "Broward Property Appraiser (bcpa.net)",
"source_url": raw.get("source_url"),
"fetched_at": raw.get("fetched_at"),
"errors": raw.get("errors", []),
"_raw": raw,
}
def _fetch_palm_beach(
*,
parcel_id: Optional[str],
timeout_seconds: int,
listing_price: Optional[float] = None,
) -> dict:
"""Palm Beach wrapper: pa_palm_beach.fetch_palm_beach_pa_record → unified."""
if not parcel_id:
return {
"county": "Palm Beach",
"state": "FL",
"errors": ["Palm Beach PA needs parcel_id (PCN); address search not yet supported"],
}
try:
from data_fetchers.pa_palm_beach import fetch_palm_beach_pa_record
except ImportError as e:
return {
"county": "Palm Beach",
"state": "FL",
"errors": [f"pa_palm_beach module import failed: {e}"],
}
raw = fetch_palm_beach_pa_record(
parcel_id=parcel_id,
timeout_seconds=timeout_seconds,
listing_price=listing_price,
)
# Mailing/site address mismatch heuristic: PB doesn't expose mailing
# separately in flat parser; site_address may include "Municipality" noise.
return {
"parcel_id": raw.get("parcel_id"),
"owner_name": raw.get("owner_name"),
"co_owners": [],
"mailing_address": None,
"site_address": raw.get("site_address"),
"owner_address_mismatch": None,
# Building
"year_built": raw.get("year_built"),
"effective_year_built": None,
"sqft_heated": raw.get("sqft_heated"),
"sqft_total": raw.get("sqft_total"),
"lot_acres": raw.get("lot_acres"),
"lot_total_sqft": None,
"bedrooms": raw.get("bedrooms"),
"baths": raw.get("baths"),
"stories": None,
"building_type": raw.get("use_code"),
"roof_type": raw.get("roof_struct"),
"roofing_cover": raw.get("roof_cover"),
"exterior_wall": None,
"interior_wall": raw.get("interior_wall"),
"int_flooring": None,
# Values
"just_value_current": raw.get("just_value_current"),
"assessed_value_current": raw.get("assessed_value_current"),
"taxable_value_current": None,
"exemption_current": None,
"just_value_last": raw.get("just_value_last"),
"assessed_value_last": raw.get("assessed_value_last"),
"taxable_value_last": None,
"taxes_paid_last": None,
"tax_year_current": raw.get("tax_year_current"),
"tax_year_last": raw.get("tax_year_last"),
# Owner signals
"homestead_active": raw.get("homestead_active"),
"homestead_amount": None,
# Sales
"sales_history": raw.get("sales_history", []),
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
"renovation_signal": raw.get("renovation_signal"),
"improvements": [],
# Land
"zoning": raw.get("zoning"),
"use_code": raw.get("use_code"),
"use_description": raw.get("use_code"),
"subdivision": raw.get("subdivision"),
"legal_description": raw.get("legal_description"),
# Metadata
"county": "Palm Beach",
"state": "FL",
"source": raw.get("source"),
"source_url": raw.get("source_url"),
"fetched_at": raw.get("fetched_at"),
"errors": raw.get("errors", []),
"_raw": raw,
}
def _fetch_miami_dade(
*,
parcel_id: Optional[str],
address: Optional[str],
timeout_seconds: int,
listing_price: Optional[float] = None,
) -> dict:
"""Miami-Dade adapter wrapper: pa_miami_dade.fetch_miami_dade_pa_record → unified schema."""
try:
from data_fetchers.pa_miami_dade import fetch_miami_dade_pa_record
except ImportError as e:
return {
"county": "Miami-Dade",
"state": "FL",
"errors": [f"pa_miami_dade module import failed: {e}"],
}
raw = fetch_miami_dade_pa_record(
parcel_id=parcel_id,
address=address,
timeout_seconds=timeout_seconds,
listing_price=listing_price,
)
# Normalize to unified schema
return {
# Identity
"parcel_id": raw.get("parcel_id"),
"owner_name": raw.get("owner_name"),
"co_owners": raw.get("co_owners", []),
"mailing_address": raw.get("mailing_address"),
"site_address": raw.get("site_address"),
# Owner address mismatch — primitive heuristic (Miami-Dade doesn't expose
# separate site/mailing comparison cleanly; can compute later if needed)
"owner_address_mismatch": None,
# Building
"year_built": raw.get("year_built"),
"effective_year_built": None, # Miami-Dade doesn't expose explicitly
"sqft_heated": raw.get("sqft_heated"),
"sqft_total": raw.get("sqft_total"),
"lot_acres": None,
"lot_total_sqft": raw.get("lot_total_sqft"),
"bedrooms": raw.get("bedrooms"),
"baths": raw.get("baths"),
"stories": raw.get("floors"),
"building_type": raw.get("use_code"),
"roof_type": None,
"roofing_cover": None,
"exterior_wall": None,
"interior_wall": None,
"int_flooring": None,
# Values
"just_value_current": raw.get("just_value_current"),
"assessed_value_current": raw.get("assessed_value_current"),
"taxable_value_current": None, # in pa-taxablevalueinformation, not parsed yet
"exemption_current": None,
"just_value_last": raw.get("just_value_last"),
"assessed_value_last": raw.get("assessed_value_last"),
"taxable_value_last": None,
"taxes_paid_last": None,
"tax_year_current": raw.get("tax_year_current"),
"tax_year_last": raw.get("tax_year_last"),
# Owner signals
"homestead_active": raw.get("homestead_active"),
"homestead_amount": None,
# Sales history
"sales_history": raw.get("sales_history", []),
"most_recent_qualified_sale": raw.get("most_recent_qualified_sale"),
# Renovation
"renovation_signal": raw.get("renovation_signal"),
"improvements": [],
# Land
"zoning": raw.get("pa_primary_zone"),
"use_code": raw.get("use_code"),
"use_description": raw.get("use_description"),
"subdivision": raw.get("subdivision"),
"legal_description": raw.get("legal_description"),
# Metadata
"county": "Miami-Dade",
"state": "FL",
"source": raw.get("source"),
"source_url": raw.get("source_url"),
"fetched_at": raw.get("fetched_at"),
"errors": raw.get("errors", []),
"_raw": raw,
}