"""data_fetchers/pa_duval.py — Full Duval County Property Appraiser extractor. Sitio: https://paopropertysearch.coj.net (ASP.NET WebForms) Flow: Search.aspx → Results.aspx → Detail.aspx?ParcelNumber=XXX Extrae todo lo publico del Duval PA para construir un Property Snapshot Report: - Owner name(s) - Property address + subdivision + legal description - Building: type, year_built, sqft heated/total, bedrooms, bathrooms, exterior wall, roof type, interior flooring - Values: just/market, assessed, exemptions (3-year history) - Tax breakdown por taxing district - Sales history completa (book/page, date, price, deed type, qualified status) - Homestead exemption (key signal: owner-occupant vs investor) - Land details (zoning, lot size, use code) - Extra features (fireplace, pool, etc.) USAGE: from data_fetchers.pa_duval import fetch_duval_pa_record rec = fetch_duval_pa_record(address="2352 SCENIC VIEW CT", zip_code="32218") # rec["year_built"], rec["sales_history"], rec["homestead_active"]... TECHNICAL: - ASP.NET WebForms con WebForm_DoPostBackWithOptions (compat IE8) - Element IDs ESTABLES (no autogenerados) - Per-search latency: ~10-15s (entry → search → results → detail) - Free (Playwright local, no API cost) """ from __future__ import annotations import re import time from datetime import datetime, timezone from typing import Optional USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131" _BASE_URL = "https://paopropertysearch.coj.net" _SEARCH_URL = f"{_BASE_URL}/Basic/Search.aspx" # Address parser para Duval ASP.NET form fields. # Acepta long form (COURT, STREET) y abbreviation (CT, ST). Strip city/state/zip # antes de parsear (split por primera coma). _ADDRESS_RE = re.compile( r"^\s*(?P\d+)\s+" r"(?:(?PN|S|E|W|NE|NW|SE|SW)\s+)?" r"(?P[A-Z][A-Z\s\d\-']*?)" r"\s+(?P" r"ST|STREET|AVE|AV|AVENUE|RD|ROAD|DR|DRIVE|CT|COURT|CIR|CIRCLE|" r"LN|LANE|BLVD|BOULEVARD|HWY|HIGHWAY|WAY|PL|PLACE|PKY|PKWY|PARKWAY|" r"TRL|TRAIL|TER|TERRACE|LOOP|RUN|ALY|ALLEY|XING|CROSSING" r")\b", re.IGNORECASE, ) # Map long form → ASP.NET ddStreetSuffix value _SUFFIX_NORMALIZE = { "STREET": "ST", "AVENUE": "AVE", "AV": "AVE", "ROAD": "RD", "DRIVE": "DR", "COURT": "CT", "CIRCLE": "CIR", "LANE": "LN", "BOULEVARD": "BLVD", "HIGHWAY": "HWY", "PLACE": "PL", "PARKWAY": "PKWY", "PKY": "PKWY", "TRAIL": "TRL", "TERRACE": "TER", "ALLEY": "ALY", "CROSSING": "XING", } # ════════════════════════════════════════════════════════════════════════════ # Field ID mapping (confirmed via probe on 2352 SCENIC VIEW CT) # ════════════════════════════════════════════════════════════════════════════ _SCALAR_IDS = { "owner_name": "ctl00_cphBody_repeaterOwnerInformation_ctl00_lblOwnerName", "site_address_line1": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingSiteAddressLine1", "site_address_line2": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingSiteAddressLine2", "building_type": "ctl00_cphBody_repeaterBuilding_ctl00_lblBuildingType", "year_built": "ctl00_cphBody_repeaterBuilding_ctl00_lblYearBuilt", "building_value": "ctl00_cphBody_repeaterBuilding_ctl00_lblBldgValue", # Tax values current + last year "tax_last_year_just": "ctl00_cphBody_lblTaxLastYearJustValue", "tax_last_year_assessed": "ctl00_cphBody_lblTaxLastYearAssessedValue", "tax_last_year_exemptions": "ctl00_cphBody_lblTaxLastYearExemptions", "tax_last_year_taxable": "ctl00_cphBody_lblTaxLastYearTaxableValue", "tax_current_year_just": "ctl00_cphBody_lblTaxCurrentYearJustValue", "tax_current_year_assessed": "ctl00_cphBody_lblTaxCurrentYearAssessedValue", "tax_current_year_exemptions": "ctl00_cphBody_lblTaxCurrentYearExemptions", "tax_current_year_taxable": "ctl00_cphBody_lblTaxCurrentYearTaxableValue", # Values from main values table (no current "in progress" year) "assessed_value_3": "ctl00_cphBody_lblAssessedValue3", "taxable_value_school": "ctl00_cphBody_lblTaxableValueSchool", } def _parse_address(address: str) -> Optional[dict]: """Parse address into Duval form fields. Acepta: '2352 SCENIC VIEW CT' → simple '2352 SCENIC VIEW Court, Jacksonville, FL 32218' → con city/state/zip '123 N MAIN ST, Anytown, FL' → con prefix """ if not address: return None # Strip city/state/zip — toma solo lo antes del primer comma street_only = address.split(",")[0].strip().upper() m = _ADDRESS_RE.search(street_only) if not m: return None suffix_raw = (m.group("suffix") or "").strip().upper() suffix_normalized = _SUFFIX_NORMALIZE.get(suffix_raw, suffix_raw) if suffix_raw else None return { "street_num": m.group("num"), "prefix": (m.group("prefix") or "").strip().upper() or None, "name": m.group("name").strip(), "suffix": suffix_normalized, } # ════════════════════════════════════════════════════════════════════════════ # Public API # ════════════════════════════════════════════════════════════════════════════ def fetch_duval_pa_record( address: Optional[str] = None, parcel_id: Optional[str] = None, zip_code: Optional[str] = None, timeout_seconds: int = 30, listing_price: Optional[float] = None, ) -> dict: """Fetch full Duval PA record by address OR parcel_id (RE#). Args: address: street address (e.g. "2352 SCENIC VIEW CT") parcel_id: Duval RE# (e.g. "044273-0370") — preferred si lo tenes zip_code: optional zip filter timeout_seconds: max wait per Playwright op Returns: Dict con TODOS los campos publicos. Si fallo, dict tiene 'errors'. Key fields: - owner_name, owner_full_address (mailing) - site_address, parcel_id (RE#), subdivision - year_built (ENTERO), building_type, sqft_heated, sqft_gross, sqft_garage, bedrooms, baths, stories - exterior_wall, roof_struct, roofing_cover, interior_wall, int_flooring - just_value_current, assessed_value_current, taxable_current, exemption_current - just_value_last, assessed_value_last, taxes_billed_last - homestead_active (bool — exemptions >= $25,000 = homestead) - sales_history: [{date, price, deed_type, qualified, book_page, vacant_improved}, ...] - extra_features: [{code, description, value}, ...] - land: {zoning, use_code, lot_acres, lot_total_sqft} - legal_description: str - tax_breakdown: [{district, assessed, exempt, taxable, tax_amt}, ...] - source_url: str (detail page URL) - fetched_at: ISO timestamp """ fetched_at = datetime.now(timezone.utc).isoformat() result = { "county": "Duval", "source": "Duval Property Appraiser (paopropertysearch.coj.net)", "fetched_at": fetched_at, "errors": [], } if not address and not parcel_id: result["errors"].append("no address or parcel_id provided") return result try: from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout except ImportError: result["errors"].append("playwright not installed") return result parsed_addr = _parse_address(address) if address else None if address and not parsed_addr: result["errors"].append(f"could not parse address '{address}' (need format: 'NUM [PREFIX] NAME SUFFIX')") return result try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) ctx = browser.new_context(user_agent=USER_AGENT) page = ctx.new_page() page.set_default_timeout(timeout_seconds * 1000) page.goto(_SEARCH_URL, wait_until="load", timeout=timeout_seconds * 1000) time.sleep(2) if parcel_id: pid_clean = parcel_id.replace("-", "").strip() detail_url = f"{_BASE_URL}/Basic/Detail.aspx?RE={pid_clean}" try: page.goto(detail_url, wait_until="load", timeout=timeout_seconds * 1000) except Exception: # If 'load' times out, fall back to 'commit' (page has navigated) page.goto(detail_url, wait_until="commit", timeout=timeout_seconds * 1000) time.sleep(5) else: # Search by address fields page.locator("#ctl00_cphBody_tbStreetNumber").fill(parsed_addr["street_num"]) if parsed_addr["prefix"]: try: page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value=parsed_addr["prefix"]) except Exception: pass page.locator("#ctl00_cphBody_tbStreetName").fill(parsed_addr["name"]) if parsed_addr["suffix"]: try: page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value=parsed_addr["suffix"]) except Exception: pass if zip_code: try: page.locator("#ctl00_cphBody_tbZipCode").fill(zip_code) except Exception: pass page.locator("#ctl00_cphBody_bSearch").click() page.wait_for_timeout(4000) # If results table → extract href from first row link and # navigate directly (Playwright click + navigation hangs on ASP.NET) rs_url = page.url if "Results.aspx" in rs_url: detail_href = _extract_detail_href_with_retry(page, max_retries=2) if not detail_href: body_preview = page.inner_text("body")[:300].replace("\n", " ") result["errors"].append( f"results page returned but no Detail.aspx link found " f"(url={page.url}, body_preview={body_preview!r})" ) browser.close() return result # Build absolute URL and navigate directly (no click) if detail_href.startswith("/"): detail_url = f"{_BASE_URL}{detail_href}" elif detail_href.startswith("http"): detail_url = detail_href else: detail_url = f"{_BASE_URL}/Basic/{detail_href}" try: page.goto(detail_url, wait_until="load", timeout=timeout_seconds * 1000) except Exception: page.goto(detail_url, wait_until="commit", timeout=timeout_seconds * 1000) time.sleep(5) elif "Detail.aspx" not in page.url: result["errors"].append(f"unexpected URL after search: {page.url}") browser.close() return result # We should now be on Detail.aspx if "Detail.aspx" not in page.url: result["errors"].append(f"failed to reach Detail page, URL: {page.url}") browser.close() return result result["source_url"] = page.url # Wait for KEY element to confirm full render before extracting. # Retry once on server error (Duval intermittent 500s). render_ok = False for attempt in range(3): try: page.wait_for_selector( "#ctl00_cphBody_repeaterBuilding_ctl00_lblYearBuilt", state="attached", timeout=20000, ) render_ok = True break except Exception: # Try owner sentinel try: page.wait_for_selector( "#ctl00_cphBody_repeaterOwnerInformation_ctl00_lblOwnerName", state="attached", timeout=10000, ) render_ok = True break except Exception: # Check if server error → retry body = page.inner_text("body")[:500] if "Server Error" in body or "Runtime Error" in body: if attempt < 2: time.sleep(8 * (attempt + 1)) try: page.reload(wait_until="load", timeout=30000) except Exception: pass continue break if not render_ok: result["errors"].append("detail page didn't render expected elements (server slow or error)") # Extract all scalar fields in one JS call scalars = page.evaluate( """(ids) => { const out = {}; for (const [k, id] of Object.entries(ids)) { const el = document.getElementById(id); out[k] = el ? (el.textContent || '').trim() : ''; } return out; }""", _SCALAR_IDS, ) # Extract sales history sales_history = page.evaluate(""" () => { const out = []; document.querySelectorAll('table').forEach((tbl) => { const hdrCells = tbl.querySelectorAll('tr')[0]?.querySelectorAll('th, td'); if (!hdrCells || hdrCells.length < 4) return; const hdrText = Array.from(hdrCells).map(c => (c.textContent||'').trim().toLowerCase()); const isSalesHdr = hdrText.some(h => h.includes('sale date') || h.includes('book/page') || h.includes('deed instrument')); if (!isSalesHdr) return; const rows = tbl.querySelectorAll('tr'); for (let i = 1; i < rows.length; i++) { const cells = rows[i].querySelectorAll('td'); if (cells.length < 4) continue; const r = {}; cells.forEach((c, idx) => { const h = hdrText[idx] || `col${idx}`; r[h] = (c.textContent || '').trim(); }); if (Object.values(r).some(v => v && v.length > 0)) out.push(r); } }); return out; } """) # Extract building area building_area = page.evaluate(""" () => { const out = {}; const grid = document.getElementById('ctl00_cphBody_repeaterBuilding_ctl00_gridBuildingArea'); if (!grid) return out; const rows = grid.querySelectorAll('tr'); for (let i = 1; i < rows.length; i++) { const cells = rows[i].querySelectorAll('td'); if (cells.length < 4) continue; const type = (cells[0].textContent || '').trim(); const gross = (cells[1].textContent || '').trim(); const heated = (cells[2].textContent || '').trim(); const effective = (cells[3].textContent || '').trim(); if (type) out[type] = { gross, heated, effective }; } return out; } """) # Extract building attributes (beds, baths, stories) attrs = page.evaluate(""" () => { const out = {}; const grid = document.getElementById('ctl00_cphBody_repeaterBuilding_ctl00_gridBuildingAttributes'); if (!grid) return out; const rows = grid.querySelectorAll('tr'); for (let i = 1; i < rows.length; i++) { const cells = rows[i].querySelectorAll('td'); if (cells.length < 2) continue; const element = (cells[0].textContent || '').trim(); const code = (cells[1].textContent || '').trim(); if (element) out[element] = code; } return out; } """) # Extract building structural elements (roof, walls, flooring) # NOTE: these come from the same building section, different grid structural = page.evaluate(""" () => { const out = {}; // Find any grid in building section with Element/Code/Detail headers document.querySelectorAll('table').forEach((tbl) => { const hdrs = tbl.querySelectorAll('tr')[0]?.querySelectorAll('th, td'); if (!hdrs) return; const ht = Array.from(hdrs).map(c => (c.textContent||'').trim().toLowerCase()); if (!(ht.includes('element') && ht.includes('code') && ht.includes('detail'))) return; // Skip the simpler attributes table (only 3 fields) const rows = tbl.querySelectorAll('tr'); if (rows.length < 4) return; for (let i = 1; i < rows.length; i++) { const cells = rows[i].querySelectorAll('td'); if (cells.length < 3) continue; const element = (cells[0].textContent || '').trim(); const detail = (cells[2].textContent || '').trim(); if (element && detail) { if (out[element]) { out[element] += '; ' + detail; } else { out[element] = detail; } } } }); return out; } """) # Extract main property identity (RE#, subdivision, etc.) from top table top_props = page.evaluate(""" () => { const out = {}; document.querySelectorAll('table').forEach((tbl) => { const rows = tbl.querySelectorAll('tr'); if (rows.length < 3) return; // Top table has key:value rows (2 cells per row) // Heuristic: first cell ends with ':' or matches known labels const knownLabels = ['re #','re#','tax district','property use', '# of buildings','legal desc','subdivision','total area']; let matchCount = 0; const candidate = {}; for (const tr of rows) { const cells = tr.querySelectorAll('td, th'); if (cells.length !== 2) continue; const k = (cells[0].textContent || '').trim().toLowerCase().replace(/:$/, ''); const v = (cells[1].textContent || '').trim(); if (k && v && knownLabels.some(kw => k.startsWith(kw))) { matchCount++; candidate[k] = v; } } if (matchCount >= 3) { Object.assign(out, candidate); } }); return out; } """) # Land details land = page.evaluate(""" () => { const out = {}; const grid = document.getElementById('ctl00_cphBody_gridLand'); if (!grid) return out; const rows = grid.querySelectorAll('tr'); if (rows.length < 2) return out; const hdrs = rows[0].querySelectorAll('th, td'); const hdrText = Array.from(hdrs).map(c => (c.textContent||'').trim().toLowerCase()); const dataRow = rows[1].querySelectorAll('td'); hdrText.forEach((h, i) => { if (dataRow[i]) out[h] = (dataRow[i].textContent || '').trim(); }); return out; } """) # Extra features (fireplace, pool, deck, etc.) features = page.evaluate(""" () => { const out = []; const grid = document.getElementById('ctl00_cphBody_gridExtraFeatures'); if (!grid) return out; const rows = grid.querySelectorAll('tr'); for (let i = 1; i < rows.length; i++) { const cells = rows[i].querySelectorAll('td'); if (cells.length < 5) continue; out.push({ code: (cells[1]?.textContent || '').trim(), description: (cells[2]?.textContent || '').trim(), units: (cells[6]?.textContent || '').trim(), value: (cells[7]?.textContent || '').trim(), }); } return out; } """) browser.close() # ─── Post-process ───────────────────────────────────────────── result.update({k: _clean(v) for k, v in scalars.items()}) # Numeric conversions for k in ("year_built",): v = result.get(k, "") if v: result[k] = _to_int(v) for k in ("building_value", "tax_last_year_just", "tax_last_year_assessed", "tax_last_year_exemptions", "tax_last_year_taxable", "tax_current_year_just", "tax_current_year_assessed", "tax_current_year_exemptions", "tax_current_year_taxable", "assessed_value_3", "taxable_value_school"): v = result.get(k, "") if v: result[k] = _money_to_int(v) # Parcel id / subdivision / etc from top props result["parcel_id"] = top_props.get("re #", "") or top_props.get("re#", "") result["tax_district"] = top_props.get("tax district", "") result["property_use"] = top_props.get("property use", "") result["num_buildings"] = top_props.get("# of buildings", "") result["subdivision"] = top_props.get("subdivision", "") result["lot_total_sqft"] = _to_int(top_props.get("total area", "") or "0") # Building area summary result["building_area_grid"] = building_area result["sqft_heated"] = _to_int( (building_area.get("Base Area") or {}).get("heated", "0") or (building_area.get("Total") or {}).get("heated", "0") or "0" ) result["sqft_gross"] = _to_int( (building_area.get("Total") or {}).get("gross", "0") or "0" ) result["sqft_garage"] = _to_int( (building_area.get("Finished Garage") or {}).get("gross", "0") or "0" ) # Attributes: beds/baths/stories def _attr_to_num(s): if not s: return None try: return float(s.split(".")[0]) if "." in s else float(s) except (ValueError, TypeError): return None result["bedrooms"] = _attr_to_num(attrs.get("Bedrooms", "")) result["baths"] = _attr_to_num(attrs.get("Baths", "")) result["stories"] = _attr_to_num(attrs.get("Stories", "")) result["units"] = _attr_to_num(attrs.get("Rooms / Units", "")) # Structural elements result["exterior_wall"] = structural.get("Exterior Wall", "") result["roof_struct"] = structural.get("Roof Struct", "") result["roofing_cover"] = structural.get("Roofing Cover", "") result["interior_wall"] = structural.get("Interior Wall", "") result["int_flooring"] = structural.get("Int Flooring", "") # Sales history normalized result["sales_history"] = [] for s in sales_history: record = { "book_page": _clean(s.get("book/page", "")), "date": _clean(s.get("sale date", "")), "price": _money_to_int(s.get("sale price", "") or "0"), "deed_type": _clean(s.get("deed instrument type code", "") or s.get("deed type", "")), "qualified": _clean(s.get("qualified/unqualified", "") or s.get("qualified", "")), "vacant_improved": _clean(s.get("vacant/improved", "")), } if any(record.values()): result["sales_history"].append(record) # Land details result["land"] = { "use_code": land.get("code", ""), "use_description": land.get("use description", ""), "zoning": land.get("zoning assessment", ""), "front": land.get("front", ""), "depth": land.get("depth", ""), "land_units": land.get("land units", ""), "land_type": land.get("land type", ""), "land_value": _money_to_int(land.get("land value", "") or "0"), } # Extra features (fireplace, pool, etc.) result["extra_features"] = features # Homestead detection: exemptions >= $25K = primary residence with HX ex_last = result.get("tax_last_year_exemptions") or 0 ex_curr = result.get("tax_current_year_exemptions") or 0 result["homestead_active"] = (ex_last >= 25000) or (ex_curr >= 25000) result["homestead_amount_current"] = ex_curr result["homestead_amount_last"] = ex_last # Convenience: most recent qualified sale price qualified_sales = [s for s in result["sales_history"] if s.get("qualified", "").lower().startswith("qualified") and s.get("price", 0) and s["price"] >= 1000] result["most_recent_qualified_sale"] = qualified_sales[0] if qualified_sales else None # Effective renovation signal: # If most recent qualified sale price >> previous qualified sale price by # >30% within 24 months → likely renovated/flipped. renov_signal = _detect_renovation_pattern( result["sales_history"], listing_price=listing_price, ) result["renovation_signal"] = renov_signal except PWTimeout as e: result["errors"].append(f"timeout: {e}") except Exception as e: import traceback result["errors"].append(f"{type(e).__name__}: {e}") result["_trace"] = traceback.format_exc()[:600] return result # ════════════════════════════════════════════════════════════════════════════ # Helpers — server retry / detail link extraction # ════════════════════════════════════════════════════════════════════════════ def _extract_detail_href_with_retry(page, max_retries: int = 2) -> Optional[str]: """Wait for Detail.aspx link on Results page, retry on server errors. Duval PA returns intermittent 500 errors ("wait operation timed out") when rate-limited. Retry with backoff handles that. """ for attempt in range(max_retries + 1): # Wait for results to render time.sleep(3) try: page.wait_for_selector("a[href*='Detail.aspx']", state="attached", timeout=15000) except Exception: pass href = page.evaluate(""" () => { const links = document.querySelectorAll("a[href*='Detail.aspx']"); return links.length > 0 ? links[0].getAttribute('href') : null; } """) if href: return href # Check if this is a server error page body = page.inner_text("body")[:500] is_server_error = ( "Server Error" in body or "wait operation timed out" in body or "Runtime Error" in body ) if is_server_error and attempt < max_retries: # Backoff and retry — reload the search backoff = 5 * (attempt + 1) time.sleep(backoff) try: page.reload(wait_until="load", timeout=30000) except Exception: pass continue # If not server error, the link just isn't there — return None return None return None # ════════════════════════════════════════════════════════════════════════════ # Helpers # ════════════════════════════════════════════════════════════════════════════ def _clean(s) -> str: if not isinstance(s, str): return s return re.sub(r"\s+", " ", s).strip() def _to_int(s) -> Optional[int]: if not s: return None cleaned = re.sub(r"[^\d-]", "", str(s)) try: return int(cleaned) if cleaned else None except ValueError: return None def _money_to_int(s) -> Optional[int]: if not s: return None cleaned = re.sub(r"[^\d.-]", "", str(s)) if not cleaned or cleaned == "-": return None try: return int(float(cleaned)) except ValueError: return None def _detect_renovation_pattern(sales: list[dict], listing_price: Optional[float] = None) -> dict: """Heuristic: detect flip / renovation / flip-in-progress patterns. Args: sales: sales_history (recent first) listing_price: optional current listing price — habilita flip-in-progress detection Returns: { "is_flip_pattern": bool, # qualified sales historical flip detected "is_flip_in_progress": bool, # NEW: recent qualified << current listing "evidence": str, "most_recent_qualified": dict | None, "prior_qualified": dict | None, "value_increase_pct": float | None, "months_between": int | None, "listing_premium_pct": float | None, # NEW: (listing - recent_qualified) / recent_qualified * 100 "months_since_recent_sale": int | None, "interpretation_es": str | None, } """ out = { "is_flip_pattern": False, "is_flip_in_progress": False, "evidence": "", "most_recent_qualified": None, "prior_qualified": None, "value_increase_pct": None, "months_between": None, "listing_premium_pct": None, "months_since_recent_sale": None, "interpretation_es": None, } qualified = [s for s in sales if s.get("qualified", "").lower().startswith("qualified") and s.get("price", 0) and s["price"] >= 1000] if not qualified: return out recent = qualified[0] out["most_recent_qualified"] = recent # ─── Pattern A: historical flip (prior qualified → recent qualified) ───── if len(qualified) >= 2: prior = qualified[1] out["prior_qualified"] = prior try: increase = (recent["price"] - prior["price"]) / prior["price"] * 100 out["value_increase_pct"] = round(increase, 1) except (TypeError, ZeroDivisionError): pass try: d1 = datetime.strptime(recent["date"], "%m/%d/%Y") d2 = datetime.strptime(prior["date"], "%m/%d/%Y") months = abs((d1 - d2).days) // 30 out["months_between"] = months except (ValueError, TypeError, KeyError): pass if out["value_increase_pct"] and out["months_between"]: if out["value_increase_pct"] >= 25 and out["months_between"] <= 30: out["is_flip_pattern"] = True out["evidence"] = ( f"+{out['value_increase_pct']}% in {out['months_between']} months " f"({prior['date']} ${prior['price']:,} -> {recent['date']} ${recent['price']:,})" ) # ─── Pattern B: FLIP-IN-PROGRESS (recent qualified << current listing) ── if listing_price and listing_price > 0 and recent.get("price", 0) > 0: try: premium = (listing_price - recent["price"]) / recent["price"] * 100 out["listing_premium_pct"] = round(premium, 1) except (TypeError, ZeroDivisionError): pass try: d_recent = datetime.strptime(recent["date"], "%m/%d/%Y") today = datetime.now() months_since = abs((today - d_recent).days) // 30 out["months_since_recent_sale"] = months_since except (ValueError, TypeError, KeyError): pass # Flip-in-progress: recent qualified sale is 15%+ below listing AND # the sale was within last 18 months (typical flip turnaround) if (out["listing_premium_pct"] and out["listing_premium_pct"] >= 15 and out["months_since_recent_sale"] is not None and out["months_since_recent_sale"] <= 18): out["is_flip_in_progress"] = True if out["evidence"]: out["evidence"] += " | " out["evidence"] += ( f"FLIP-IN-PROGRESS: owner bought ${recent['price']:,} on {recent['date']} " f"({out['months_since_recent_sale']}mo ago), listing ${listing_price:,.0f} " f"(+{out['listing_premium_pct']}%)" ) # ─── Spanish interpretation ───────────────────────────────────────────── if out["is_flip_in_progress"] and out["is_flip_pattern"]: out["interpretation_es"] = ( "PATRON DE FLIP REPETIDO: la propiedad ya fue flipped una vez en el " "historial. El owner actual la compro reciente y la lista mucho mas " "alto. Probable renovacion reciente -> precio refleja inversion. " "Si comprador final, esperate negociacion dura del owner (necesita " "recuperar costos de rehab + margen)." ) elif out["is_flip_in_progress"]: out["interpretation_es"] = ( f"FLIP-IN-PROGRESS: el owner compro hace {out['months_since_recent_sale']}mo " f"a ${recent['price']:,} y lista a ${listing_price:,.0f} (+{out['listing_premium_pct']:.0f}%). " "Probable renovacion en el medio. Precio incluye trabajo. Negociar dificil — " "owner tiene 'sunk cost' del rehab. Validar condicion real con inspeccion." ) elif out["is_flip_pattern"]: out["interpretation_es"] = ( f"HISTORIAL DE FLIP: la propiedad subio +{out['value_increase_pct']}% en " f"{out['months_between']}mo (sale prior). Indica renovacion previa." ) return out # ════════════════════════════════════════════════════════════════════════════ # CLI # ════════════════════════════════════════════════════════════════════════════ if __name__ == "__main__": import argparse import json parser = argparse.ArgumentParser(description="Duval PA full record fetcher") parser.add_argument("--address", help="Street address (e.g. '2352 SCENIC VIEW CT')") parser.add_argument("--parcel", help="RE# (e.g. '044273-0370')") parser.add_argument("--zip", help="Optional ZIP filter") args = parser.parse_args() if not args.address and not args.parcel: parser.error("--address or --parcel required") rec = fetch_duval_pa_record( address=args.address, parcel_id=args.parcel, zip_code=args.zip, ) print(json.dumps(rec, indent=2, default=str))