Files
AR-House/data_fetchers/pa_duval.py
T
2026-07-03 12:24:58 -04:00

821 lines
38 KiB
Python

"""data_fetchers/pa_duval.py — Full Duval County Property Appraiser extractor.
Sitio: https://paopropertysearch.coj.net (ASP.NET WebForms)
Flow: Search.aspx → Results.aspx → Detail.aspx?ParcelNumber=XXX
Extrae todo lo publico del Duval PA para construir un Property Snapshot Report:
- Owner name(s)
- Property address + subdivision + legal description
- Building: type, year_built, sqft heated/total, bedrooms, bathrooms,
exterior wall, roof type, interior flooring
- Values: just/market, assessed, exemptions (3-year history)
- Tax breakdown por taxing district
- Sales history completa (book/page, date, price, deed type, qualified status)
- Homestead exemption (key signal: owner-occupant vs investor)
- Land details (zoning, lot size, use code)
- Extra features (fireplace, pool, etc.)
USAGE:
from data_fetchers.pa_duval import fetch_duval_pa_record
rec = fetch_duval_pa_record(address="2352 SCENIC VIEW CT", zip_code="32218")
# rec["year_built"], rec["sales_history"], rec["homestead_active"]...
TECHNICAL:
- ASP.NET WebForms con WebForm_DoPostBackWithOptions (compat IE8)
- Element IDs ESTABLES (no autogenerados)
- Per-search latency: ~10-15s (entry → search → results → detail)
- Free (Playwright local, no API cost)
"""
from __future__ import annotations
import re
import time
from datetime import datetime, timezone
from typing import Optional
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
_BASE_URL = "https://paopropertysearch.coj.net"
_SEARCH_URL = f"{_BASE_URL}/Basic/Search.aspx"
# Address parser para Duval ASP.NET form fields.
# Acepta long form (COURT, STREET) y abbreviation (CT, ST). Strip city/state/zip
# antes de parsear (split por primera coma).
_ADDRESS_RE = re.compile(
r"^\s*(?P<num>\d+)\s+"
r"(?:(?P<prefix>N|S|E|W|NE|NW|SE|SW)\s+)?"
r"(?P<name>[A-Z][A-Z\s\d\-']*?)"
r"\s+(?P<suffix>"
r"ST|STREET|AVE|AV|AVENUE|RD|ROAD|DR|DRIVE|CT|COURT|CIR|CIRCLE|"
r"LN|LANE|BLVD|BOULEVARD|HWY|HIGHWAY|WAY|PL|PLACE|PKY|PKWY|PARKWAY|"
r"TRL|TRAIL|TER|TERRACE|LOOP|RUN|ALY|ALLEY|XING|CROSSING"
r")\b",
re.IGNORECASE,
)
# Map long form → ASP.NET ddStreetSuffix value
_SUFFIX_NORMALIZE = {
"STREET": "ST", "AVENUE": "AVE", "AV": "AVE", "ROAD": "RD",
"DRIVE": "DR", "COURT": "CT", "CIRCLE": "CIR", "LANE": "LN",
"BOULEVARD": "BLVD", "HIGHWAY": "HWY", "PLACE": "PL",
"PARKWAY": "PKWY", "PKY": "PKWY", "TRAIL": "TRL", "TERRACE": "TER",
"ALLEY": "ALY", "CROSSING": "XING",
}
# ════════════════════════════════════════════════════════════════════════════
# Field ID mapping (confirmed via probe on 2352 SCENIC VIEW CT)
# ════════════════════════════════════════════════════════════════════════════
_SCALAR_IDS = {
"owner_name": "ctl00_cphBody_repeaterOwnerInformation_ctl00_lblOwnerName",
"site_address_line1": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingSiteAddressLine1",
"site_address_line2": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingSiteAddressLine2",
"building_type": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingType",
"year_built": "ctl00_cphBody_repeaterBuilding_ctl00_lblYearBuilt",
"building_value": "ctl00_cphBody_repeaterBuilding_ctl00_lblBldgValue",
# Tax values current + last year
"tax_last_year_just": "ctl00_cphBody_lblTaxLastYearJustValue",
"tax_last_year_assessed": "ctl00_cphBody_lblTaxLastYearAssessedValue",
"tax_last_year_exemptions": "ctl00_cphBody_lblTaxLastYearExemptions",
"tax_last_year_taxable": "ctl00_cphBody_lblTaxLastYearTaxableValue",
"tax_current_year_just": "ctl00_cphBody_lblTaxCurrentYearJustValue",
"tax_current_year_assessed": "ctl00_cphBody_lblTaxCurrentYearAssessedValue",
"tax_current_year_exemptions": "ctl00_cphBody_lblTaxCurrentYearExemptions",
"tax_current_year_taxable": "ctl00_cphBody_lblTaxCurrentYearTaxableValue",
# Values from main values table (no current "in progress" year)
"assessed_value_3": "ctl00_cphBody_lblAssessedValue3",
"taxable_value_school": "ctl00_cphBody_lblTaxableValueSchool",
}
def _parse_address(address: str) -> Optional[dict]:
"""Parse address into Duval form fields.
Acepta:
'2352 SCENIC VIEW CT' → simple
'2352 SCENIC VIEW Court, Jacksonville, FL 32218' → con city/state/zip
'123 N MAIN ST, Anytown, FL' → con prefix
"""
if not address:
return None
# Strip city/state/zip — toma solo lo antes del primer comma
street_only = address.split(",")[0].strip().upper()
m = _ADDRESS_RE.search(street_only)
if not m:
return None
suffix_raw = (m.group("suffix") or "").strip().upper()
suffix_normalized = _SUFFIX_NORMALIZE.get(suffix_raw, suffix_raw) if suffix_raw else None
return {
"street_num": m.group("num"),
"prefix": (m.group("prefix") or "").strip().upper() or None,
"name": m.group("name").strip(),
"suffix": suffix_normalized,
}
# ════════════════════════════════════════════════════════════════════════════
# Public API
# ════════════════════════════════════════════════════════════════════════════
def fetch_duval_pa_record(
address: Optional[str] = None,
parcel_id: Optional[str] = None,
zip_code: Optional[str] = None,
timeout_seconds: int = 30,
listing_price: Optional[float] = None,
) -> dict:
"""Fetch full Duval PA record by address OR parcel_id (RE#).
Args:
address: street address (e.g. "2352 SCENIC VIEW CT")
parcel_id: Duval RE# (e.g. "044273-0370") — preferred si lo tenes
zip_code: optional zip filter
timeout_seconds: max wait per Playwright op
Returns:
Dict con TODOS los campos publicos. Si fallo, dict tiene 'errors'.
Key fields:
- owner_name, owner_full_address (mailing)
- site_address, parcel_id (RE#), subdivision
- year_built (ENTERO), building_type, sqft_heated, sqft_gross,
sqft_garage, bedrooms, baths, stories
- exterior_wall, roof_struct, roofing_cover, interior_wall, int_flooring
- just_value_current, assessed_value_current, taxable_current,
exemption_current
- just_value_last, assessed_value_last, taxes_billed_last
- homestead_active (bool — exemptions >= $25,000 = homestead)
- sales_history: [{date, price, deed_type, qualified, book_page,
vacant_improved}, ...]
- extra_features: [{code, description, value}, ...]
- land: {zoning, use_code, lot_acres, lot_total_sqft}
- legal_description: str
- tax_breakdown: [{district, assessed, exempt, taxable, tax_amt}, ...]
- source_url: str (detail page URL)
- fetched_at: ISO timestamp
"""
fetched_at = datetime.now(timezone.utc).isoformat()
result = {
"county": "Duval",
"source": "Duval Property Appraiser (paopropertysearch.coj.net)",
"fetched_at": fetched_at,
"errors": [],
}
if not address and not parcel_id:
result["errors"].append("no address or parcel_id provided")
return result
try:
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
except ImportError:
result["errors"].append("playwright not installed")
return result
parsed_addr = _parse_address(address) if address else None
if address and not parsed_addr:
result["errors"].append(f"could not parse address '{address}' (need format: 'NUM [PREFIX] NAME SUFFIX')")
return result
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
ctx = browser.new_context(user_agent=USER_AGENT)
page = ctx.new_page()
page.set_default_timeout(timeout_seconds * 1000)
page.goto(_SEARCH_URL, wait_until="load", timeout=timeout_seconds * 1000)
time.sleep(2)
if parcel_id:
pid_clean = parcel_id.replace("-", "").strip()
detail_url = f"{_BASE_URL}/Basic/Detail.aspx?RE={pid_clean}"
try:
page.goto(detail_url, wait_until="load", timeout=timeout_seconds * 1000)
except Exception:
# If 'load' times out, fall back to 'commit' (page has navigated)
page.goto(detail_url, wait_until="commit", timeout=timeout_seconds * 1000)
time.sleep(5)
else:
# Search by address fields
page.locator("#ctl00_cphBody_tbStreetNumber").fill(parsed_addr["street_num"])
if parsed_addr["prefix"]:
try:
page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value=parsed_addr["prefix"])
except Exception:
pass
page.locator("#ctl00_cphBody_tbStreetName").fill(parsed_addr["name"])
if parsed_addr["suffix"]:
try:
page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value=parsed_addr["suffix"])
except Exception:
pass
if zip_code:
try:
page.locator("#ctl00_cphBody_tbZipCode").fill(zip_code)
except Exception:
pass
page.locator("#ctl00_cphBody_bSearch").click()
page.wait_for_timeout(4000)
# If results table → extract href from first row link and
# navigate directly (Playwright click + navigation hangs on ASP.NET)
rs_url = page.url
if "Results.aspx" in rs_url:
detail_href = _extract_detail_href_with_retry(page, max_retries=2)
if not detail_href:
body_preview = page.inner_text("body")[:300].replace("\n", " ")
result["errors"].append(
f"results page returned but no Detail.aspx link found "
f"(url={page.url}, body_preview={body_preview!r})"
)
browser.close()
return result
# Build absolute URL and navigate directly (no click)
if detail_href.startswith("/"):
detail_url = f"{_BASE_URL}{detail_href}"
elif detail_href.startswith("http"):
detail_url = detail_href
else:
detail_url = f"{_BASE_URL}/Basic/{detail_href}"
try:
page.goto(detail_url, wait_until="load", timeout=timeout_seconds * 1000)
except Exception:
page.goto(detail_url, wait_until="commit", timeout=timeout_seconds * 1000)
time.sleep(5)
elif "Detail.aspx" not in page.url:
result["errors"].append(f"unexpected URL after search: {page.url}")
browser.close()
return result
# We should now be on Detail.aspx
if "Detail.aspx" not in page.url:
result["errors"].append(f"failed to reach Detail page, URL: {page.url}")
browser.close()
return result
result["source_url"] = page.url
# Wait for KEY element to confirm full render before extracting.
# Retry once on server error (Duval intermittent 500s).
render_ok = False
for attempt in range(3):
try:
page.wait_for_selector(
"#ctl00_cphBody_repeaterBuilding_ctl00_lblYearBuilt",
state="attached",
timeout=20000,
)
render_ok = True
break
except Exception:
# Try owner sentinel
try:
page.wait_for_selector(
"#ctl00_cphBody_repeaterOwnerInformation_ctl00_lblOwnerName",
state="attached",
timeout=10000,
)
render_ok = True
break
except Exception:
# Check if server error → retry
body = page.inner_text("body")[:500]
if "Server Error" in body or "Runtime Error" in body:
if attempt < 2:
time.sleep(8 * (attempt + 1))
try:
page.reload(wait_until="load", timeout=30000)
except Exception:
pass
continue
break
if not render_ok:
result["errors"].append("detail page didn't render expected elements (server slow or error)")
# Extract all scalar fields in one JS call
scalars = page.evaluate(
"""(ids) => {
const out = {};
for (const [k, id] of Object.entries(ids)) {
const el = document.getElementById(id);
out[k] = el ? (el.textContent || '').trim() : '';
}
return out;
}""",
_SCALAR_IDS,
)
# Extract sales history
sales_history = page.evaluate("""
() => {
const out = [];
document.querySelectorAll('table').forEach((tbl) => {
const hdrCells = tbl.querySelectorAll('tr')[0]?.querySelectorAll('th, td');
if (!hdrCells || hdrCells.length < 4) return;
const hdrText = Array.from(hdrCells).map(c => (c.textContent||'').trim().toLowerCase());
const isSalesHdr = hdrText.some(h => h.includes('sale date') || h.includes('book/page') || h.includes('deed instrument'));
if (!isSalesHdr) return;
const rows = tbl.querySelectorAll('tr');
for (let i = 1; i < rows.length; i++) {
const cells = rows[i].querySelectorAll('td');
if (cells.length < 4) continue;
const r = {};
cells.forEach((c, idx) => {
const h = hdrText[idx] || `col${idx}`;
r[h] = (c.textContent || '').trim();
});
if (Object.values(r).some(v => v && v.length > 0)) out.push(r);
}
});
return out;
}
""")
# Extract building area
building_area = page.evaluate("""
() => {
const out = {};
const grid = document.getElementById('ctl00_cphBody_repeaterBuilding_ctl00_gridBuildingArea');
if (!grid) return out;
const rows = grid.querySelectorAll('tr');
for (let i = 1; i < rows.length; i++) {
const cells = rows[i].querySelectorAll('td');
if (cells.length < 4) continue;
const type = (cells[0].textContent || '').trim();
const gross = (cells[1].textContent || '').trim();
const heated = (cells[2].textContent || '').trim();
const effective = (cells[3].textContent || '').trim();
if (type) out[type] = { gross, heated, effective };
}
return out;
}
""")
# Extract building attributes (beds, baths, stories)
attrs = page.evaluate("""
() => {
const out = {};
const grid = document.getElementById('ctl00_cphBody_repeaterBuilding_ctl00_gridBuildingAttributes');
if (!grid) return out;
const rows = grid.querySelectorAll('tr');
for (let i = 1; i < rows.length; i++) {
const cells = rows[i].querySelectorAll('td');
if (cells.length < 2) continue;
const element = (cells[0].textContent || '').trim();
const code = (cells[1].textContent || '').trim();
if (element) out[element] = code;
}
return out;
}
""")
# Extract building structural elements (roof, walls, flooring)
# NOTE: these come from the same building section, different grid
structural = page.evaluate("""
() => {
const out = {};
// Find any grid in building section with Element/Code/Detail headers
document.querySelectorAll('table').forEach((tbl) => {
const hdrs = tbl.querySelectorAll('tr')[0]?.querySelectorAll('th, td');
if (!hdrs) return;
const ht = Array.from(hdrs).map(c => (c.textContent||'').trim().toLowerCase());
if (!(ht.includes('element') && ht.includes('code') && ht.includes('detail'))) return;
// Skip the simpler attributes table (only 3 fields)
const rows = tbl.querySelectorAll('tr');
if (rows.length < 4) return;
for (let i = 1; i < rows.length; i++) {
const cells = rows[i].querySelectorAll('td');
if (cells.length < 3) continue;
const element = (cells[0].textContent || '').trim();
const detail = (cells[2].textContent || '').trim();
if (element && detail) {
if (out[element]) {
out[element] += '; ' + detail;
} else {
out[element] = detail;
}
}
}
});
return out;
}
""")
# Extract main property identity (RE#, subdivision, etc.) from top table
top_props = page.evaluate("""
() => {
const out = {};
document.querySelectorAll('table').forEach((tbl) => {
const rows = tbl.querySelectorAll('tr');
if (rows.length < 3) return;
// Top table has key:value rows (2 cells per row)
// Heuristic: first cell ends with ':' or matches known labels
const knownLabels = ['re #','re#','tax district','property use',
'# of buildings','legal desc','subdivision','total area'];
let matchCount = 0;
const candidate = {};
for (const tr of rows) {
const cells = tr.querySelectorAll('td, th');
if (cells.length !== 2) continue;
const k = (cells[0].textContent || '').trim().toLowerCase().replace(/:$/, '');
const v = (cells[1].textContent || '').trim();
if (k && v && knownLabels.some(kw => k.startsWith(kw))) {
matchCount++;
candidate[k] = v;
}
}
if (matchCount >= 3) {
Object.assign(out, candidate);
}
});
return out;
}
""")
# Land details
land = page.evaluate("""
() => {
const out = {};
const grid = document.getElementById('ctl00_cphBody_gridLand');
if (!grid) return out;
const rows = grid.querySelectorAll('tr');
if (rows.length < 2) return out;
const hdrs = rows[0].querySelectorAll('th, td');
const hdrText = Array.from(hdrs).map(c => (c.textContent||'').trim().toLowerCase());
const dataRow = rows[1].querySelectorAll('td');
hdrText.forEach((h, i) => {
if (dataRow[i]) out[h] = (dataRow[i].textContent || '').trim();
});
return out;
}
""")
# Extra features (fireplace, pool, deck, etc.)
features = page.evaluate("""
() => {
const out = [];
const grid = document.getElementById('ctl00_cphBody_gridExtraFeatures');
if (!grid) return out;
const rows = grid.querySelectorAll('tr');
for (let i = 1; i < rows.length; i++) {
const cells = rows[i].querySelectorAll('td');
if (cells.length < 5) continue;
out.push({
code: (cells[1]?.textContent || '').trim(),
description: (cells[2]?.textContent || '').trim(),
units: (cells[6]?.textContent || '').trim(),
value: (cells[7]?.textContent || '').trim(),
});
}
return out;
}
""")
browser.close()
# ─── Post-process ─────────────────────────────────────────────
result.update({k: _clean(v) for k, v in scalars.items()})
# Numeric conversions
for k in ("year_built",):
v = result.get(k, "")
if v:
result[k] = _to_int(v)
for k in ("building_value", "tax_last_year_just", "tax_last_year_assessed",
"tax_last_year_exemptions", "tax_last_year_taxable",
"tax_current_year_just", "tax_current_year_assessed",
"tax_current_year_exemptions", "tax_current_year_taxable",
"assessed_value_3", "taxable_value_school"):
v = result.get(k, "")
if v:
result[k] = _money_to_int(v)
# Parcel id / subdivision / etc from top props
result["parcel_id"] = top_props.get("re #", "") or top_props.get("re#", "")
result["tax_district"] = top_props.get("tax district", "")
result["property_use"] = top_props.get("property use", "")
result["num_buildings"] = top_props.get("# of buildings", "")
result["subdivision"] = top_props.get("subdivision", "")
result["lot_total_sqft"] = _to_int(top_props.get("total area", "") or "0")
# Building area summary
result["building_area_grid"] = building_area
result["sqft_heated"] = _to_int(
(building_area.get("Base Area") or {}).get("heated", "0") or
(building_area.get("Total") or {}).get("heated", "0") or "0"
)
result["sqft_gross"] = _to_int(
(building_area.get("Total") or {}).get("gross", "0") or "0"
)
result["sqft_garage"] = _to_int(
(building_area.get("Finished Garage") or {}).get("gross", "0") or "0"
)
# Attributes: beds/baths/stories
def _attr_to_num(s):
if not s:
return None
try:
return float(s.split(".")[0]) if "." in s else float(s)
except (ValueError, TypeError):
return None
result["bedrooms"] = _attr_to_num(attrs.get("Bedrooms", ""))
result["baths"] = _attr_to_num(attrs.get("Baths", ""))
result["stories"] = _attr_to_num(attrs.get("Stories", ""))
result["units"] = _attr_to_num(attrs.get("Rooms / Units", ""))
# Structural elements
result["exterior_wall"] = structural.get("Exterior Wall", "")
result["roof_struct"] = structural.get("Roof Struct", "")
result["roofing_cover"] = structural.get("Roofing Cover", "")
result["interior_wall"] = structural.get("Interior Wall", "")
result["int_flooring"] = structural.get("Int Flooring", "")
# Sales history normalized
result["sales_history"] = []
for s in sales_history:
record = {
"book_page": _clean(s.get("book/page", "")),
"date": _clean(s.get("sale date", "")),
"price": _money_to_int(s.get("sale price", "") or "0"),
"deed_type": _clean(s.get("deed instrument type code", "") or s.get("deed type", "")),
"qualified": _clean(s.get("qualified/unqualified", "") or s.get("qualified", "")),
"vacant_improved": _clean(s.get("vacant/improved", "")),
}
if any(record.values()):
result["sales_history"].append(record)
# Land details
result["land"] = {
"use_code": land.get("code", ""),
"use_description": land.get("use description", ""),
"zoning": land.get("zoning assessment", ""),
"front": land.get("front", ""),
"depth": land.get("depth", ""),
"land_units": land.get("land units", ""),
"land_type": land.get("land type", ""),
"land_value": _money_to_int(land.get("land value", "") or "0"),
}
# Extra features (fireplace, pool, etc.)
result["extra_features"] = features
# Homestead detection: exemptions >= $25K = primary residence with HX
ex_last = result.get("tax_last_year_exemptions") or 0
ex_curr = result.get("tax_current_year_exemptions") or 0
result["homestead_active"] = (ex_last >= 25000) or (ex_curr >= 25000)
result["homestead_amount_current"] = ex_curr
result["homestead_amount_last"] = ex_last
# Convenience: most recent qualified sale price
qualified_sales = [s for s in result["sales_history"]
if s.get("qualified", "").lower().startswith("qualified")
and s.get("price", 0) and s["price"] >= 1000]
result["most_recent_qualified_sale"] = qualified_sales[0] if qualified_sales else None
# Effective renovation signal:
# If most recent qualified sale price >> previous qualified sale price by
# >30% within 24 months → likely renovated/flipped.
renov_signal = _detect_renovation_pattern(
result["sales_history"], listing_price=listing_price,
)
result["renovation_signal"] = renov_signal
except PWTimeout as e:
result["errors"].append(f"timeout: {e}")
except Exception as e:
import traceback
result["errors"].append(f"{type(e).__name__}: {e}")
result["_trace"] = traceback.format_exc()[:600]
return result
# ════════════════════════════════════════════════════════════════════════════
# Helpers — server retry / detail link extraction
# ════════════════════════════════════════════════════════════════════════════
def _extract_detail_href_with_retry(page, max_retries: int = 2) -> Optional[str]:
"""Wait for Detail.aspx link on Results page, retry on server errors.
Duval PA returns intermittent 500 errors ("wait operation timed out")
when rate-limited. Retry with backoff handles that.
"""
for attempt in range(max_retries + 1):
# Wait for results to render
time.sleep(3)
try:
page.wait_for_selector("a[href*='Detail.aspx']", state="attached", timeout=15000)
except Exception:
pass
href = page.evaluate("""
() => {
const links = document.querySelectorAll("a[href*='Detail.aspx']");
return links.length > 0 ? links[0].getAttribute('href') : null;
}
""")
if href:
return href
# Check if this is a server error page
body = page.inner_text("body")[:500]
is_server_error = (
"Server Error" in body or
"wait operation timed out" in body or
"Runtime Error" in body
)
if is_server_error and attempt < max_retries:
# Backoff and retry — reload the search
backoff = 5 * (attempt + 1)
time.sleep(backoff)
try:
page.reload(wait_until="load", timeout=30000)
except Exception:
pass
continue
# If not server error, the link just isn't there — return None
return None
return None
# ════════════════════════════════════════════════════════════════════════════
# Helpers
# ════════════════════════════════════════════════════════════════════════════
def _clean(s) -> str:
if not isinstance(s, str):
return s
return re.sub(r"\s+", " ", s).strip()
def _to_int(s) -> Optional[int]:
if not s:
return None
cleaned = re.sub(r"[^\d-]", "", str(s))
try:
return int(cleaned) if cleaned else None
except ValueError:
return None
def _money_to_int(s) -> Optional[int]:
if not s:
return None
cleaned = re.sub(r"[^\d.-]", "", str(s))
if not cleaned or cleaned == "-":
return None
try:
return int(float(cleaned))
except ValueError:
return None
def _detect_renovation_pattern(sales: list[dict], listing_price: Optional[float] = None) -> dict:
"""Heuristic: detect flip / renovation / flip-in-progress patterns.
Args:
sales: sales_history (recent first)
listing_price: optional current listing price — habilita flip-in-progress detection
Returns:
{
"is_flip_pattern": bool, # qualified sales historical flip detected
"is_flip_in_progress": bool, # NEW: recent qualified << current listing
"evidence": str,
"most_recent_qualified": dict | None,
"prior_qualified": dict | None,
"value_increase_pct": float | None,
"months_between": int | None,
"listing_premium_pct": float | None, # NEW: (listing - recent_qualified) / recent_qualified * 100
"months_since_recent_sale": int | None,
"interpretation_es": str | None,
}
"""
out = {
"is_flip_pattern": False,
"is_flip_in_progress": False,
"evidence": "",
"most_recent_qualified": None,
"prior_qualified": None,
"value_increase_pct": None,
"months_between": None,
"listing_premium_pct": None,
"months_since_recent_sale": None,
"interpretation_es": None,
}
qualified = [s for s in sales
if s.get("qualified", "").lower().startswith("qualified")
and s.get("price", 0) and s["price"] >= 1000]
if not qualified:
return out
recent = qualified[0]
out["most_recent_qualified"] = recent
# ─── Pattern A: historical flip (prior qualified → recent qualified) ─────
if len(qualified) >= 2:
prior = qualified[1]
out["prior_qualified"] = prior
try:
increase = (recent["price"] - prior["price"]) / prior["price"] * 100
out["value_increase_pct"] = round(increase, 1)
except (TypeError, ZeroDivisionError):
pass
try:
d1 = datetime.strptime(recent["date"], "%m/%d/%Y")
d2 = datetime.strptime(prior["date"], "%m/%d/%Y")
months = abs((d1 - d2).days) // 30
out["months_between"] = months
except (ValueError, TypeError, KeyError):
pass
if out["value_increase_pct"] and out["months_between"]:
if out["value_increase_pct"] >= 25 and out["months_between"] <= 30:
out["is_flip_pattern"] = True
out["evidence"] = (
f"+{out['value_increase_pct']}% in {out['months_between']} months "
f"({prior['date']} ${prior['price']:,} -> {recent['date']} ${recent['price']:,})"
)
# ─── Pattern B: FLIP-IN-PROGRESS (recent qualified << current listing) ──
if listing_price and listing_price > 0 and recent.get("price", 0) > 0:
try:
premium = (listing_price - recent["price"]) / recent["price"] * 100
out["listing_premium_pct"] = round(premium, 1)
except (TypeError, ZeroDivisionError):
pass
try:
d_recent = datetime.strptime(recent["date"], "%m/%d/%Y")
today = datetime.now()
months_since = abs((today - d_recent).days) // 30
out["months_since_recent_sale"] = months_since
except (ValueError, TypeError, KeyError):
pass
# Flip-in-progress: recent qualified sale is 15%+ below listing AND
# the sale was within last 18 months (typical flip turnaround)
if (out["listing_premium_pct"] and out["listing_premium_pct"] >= 15
and out["months_since_recent_sale"] is not None
and out["months_since_recent_sale"] <= 18):
out["is_flip_in_progress"] = True
if out["evidence"]:
out["evidence"] += " | "
out["evidence"] += (
f"FLIP-IN-PROGRESS: owner bought ${recent['price']:,} on {recent['date']} "
f"({out['months_since_recent_sale']}mo ago), listing ${listing_price:,.0f} "
f"(+{out['listing_premium_pct']}%)"
)
# ─── Spanish interpretation ─────────────────────────────────────────────
if out["is_flip_in_progress"] and out["is_flip_pattern"]:
out["interpretation_es"] = (
"PATRON DE FLIP REPETIDO: la propiedad ya fue flipped una vez en el "
"historial. El owner actual la compro reciente y la lista mucho mas "
"alto. Probable renovacion reciente -> precio refleja inversion. "
"Si comprador final, esperate negociacion dura del owner (necesita "
"recuperar costos de rehab + margen)."
)
elif out["is_flip_in_progress"]:
out["interpretation_es"] = (
f"FLIP-IN-PROGRESS: el owner compro hace {out['months_since_recent_sale']}mo "
f"a ${recent['price']:,} y lista a ${listing_price:,.0f} (+{out['listing_premium_pct']:.0f}%). "
"Probable renovacion en el medio. Precio incluye trabajo. Negociar dificil — "
"owner tiene 'sunk cost' del rehab. Validar condicion real con inspeccion."
)
elif out["is_flip_pattern"]:
out["interpretation_es"] = (
f"HISTORIAL DE FLIP: la propiedad subio +{out['value_increase_pct']}% en "
f"{out['months_between']}mo (sale prior). Indica renovacion previa."
)
return out
# ════════════════════════════════════════════════════════════════════════════
# CLI
# ════════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
import argparse
import json
parser = argparse.ArgumentParser(description="Duval PA full record fetcher")
parser.add_argument("--address", help="Street address (e.g. '2352 SCENIC VIEW CT')")
parser.add_argument("--parcel", help="RE# (e.g. '044273-0370')")
parser.add_argument("--zip", help="Optional ZIP filter")
args = parser.parse_args()
if not args.address and not args.parcel:
parser.error("--address or --parcel required")
rec = fetch_duval_pa_record(
address=args.address, parcel_id=args.parcel, zip_code=args.zip,
)
print(json.dumps(rec, indent=2, default=str))