"""data_fetchers/pa_miami_dade.py — Full Miami-Dade PA extractor.

Sitio: https://apps.miamidadepa.gov/PropertySearch/ (Angular 14 + Kendo UI)
Deep link: /PropertySearch/#/?folio={folio_no_dashes}

Extrae todo lo publico del Miami-Dade PA via los components Angular:
- pa-propertyinformation: folio, sub-division, address, owner, mailing,
  PA primary zone, primary land use, beds/baths/half, floors, living units,
  living area, adjusted area, lot size, year built
- pa-salesinformation: sales history (date, price, OR book-page, qualification,
  previous owner)
- pa-assessmentinformation: land/building/extra/market/assessed 3 anios
- pa-taxablevalueinformation: COUNTY/SCHOOL/etc exemption + taxable
- pa-benefitsinformation: homestead + other exemptions
- pa-legaldescription: legal description completa

USAGE:
    from data_fetchers.pa_miami_dade import fetch_miami_dade_pa_record
    rec = fetch_miami_dade_pa_record(parcel_id="31-2202-034-2470")
    # rec["owner_name"], rec["year_built"], rec["sales_history"]...
"""
from __future__ import annotations

import re
import time
from datetime import datetime, timezone
from typing import Optional


USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131"
_BASE_URL = "https://apps.miamidadepa.gov/PropertySearch"


# ════════════════════════════════════════════════════════════════════════════
# Text parsing helpers — labels are on left, values on right (newline separated)
# ════════════════════════════════════════════════════════════════════════════

def _grab_after_label(text: str, label: str) -> Optional[str]:
    """Find 'label' and return text immediately after (until next label/newline)."""
    if not text or not label:
        return None
    # Match "Label:value" or "Label\nvalue" or "Label\tvalue"
    pattern = re.compile(
        rf"{re.escape(label)}\s*[:\t]*\s*\n?\s*([^\n]+?)(?:\n|$)",
        re.IGNORECASE,
    )
    m = pattern.search(text)
    if m:
        return m.group(1).strip()
    return None


def _to_int(s) -> Optional[int]:
    if not s:
        return None
    cleaned = re.sub(r"[^\d-]", "", str(s))
    try:
        return int(cleaned) if cleaned else None
    except ValueError:
        return None


def _money_to_int(s) -> Optional[int]:
    if not s:
        return None
    cleaned = re.sub(r"[^\d.-]", "", str(s))
    if not cleaned or cleaned == "-":
        return None
    try:
        return int(float(cleaned))
    except ValueError:
        return None


# ════════════════════════════════════════════════════════════════════════════
# Public API
# ════════════════════════════════════════════════════════════════════════════

def fetch_miami_dade_pa_record(
    parcel_id: Optional[str] = None,
    address: Optional[str] = None,
    timeout_seconds: int = 45,
    listing_price: Optional[float] = None,
) -> dict:
    """Fetch full Miami-Dade PA record.

    Args:
        parcel_id: folio number (e.g. "31-2202-034-2470" or "3122020342470")
        address: alternative search by address (less reliable in this portal)
        timeout_seconds: max wait per playwright op
        listing_price: enables flip-in-progress detection

    Returns: rich dict (same schema as pa_duval/pa_broward) with errors list.
    """
    fetched_at = datetime.now(timezone.utc).isoformat()
    result = {
        "county": "Miami-Dade",
        "source": "Miami-Dade Property Appraiser (apps.miamidadepa.gov)",
        "fetched_at": fetched_at,
        "errors": [],
    }

    if not parcel_id and not address:
        result["errors"].append("no parcel_id or address provided")
        return result

    try:
        from playwright.sync_api import sync_playwright
    except ImportError:
        result["errors"].append("playwright not installed")
        return result

    # Normalize folio (no dashes for URL)
    folio_clean = (parcel_id or "").replace("-", "").strip()

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            ctx = browser.new_context(user_agent=USER_AGENT)
            page = ctx.new_page()
            page.set_default_timeout(timeout_seconds * 1000)

            if folio_clean:
                # Deep link by folio
                url = f"{_BASE_URL}/#/?folio={folio_clean}"
                page.goto(url, wait_until="domcontentloaded")
            else:
                # Search by address — landing page + fill form
                page.goto(f"{_BASE_URL}/", wait_until="domcontentloaded")
                time.sleep(5)
                # Address tab is default. Fill kendo-textbox[formcontrolname='address']
                addr_input = page.locator("kendo-textbox[formcontrolname='address'] input").first
                addr_input.fill(address or "")
                page.locator("button[aria-label='Search button']").first.click()

            # Wait for property info to render
            try:
                page.wait_for_function(
                    "() => document.querySelector('pa-propertyinformation') "
                    "&& document.querySelector('pa-propertyinformation').innerText.includes('Folio')",
                    timeout=20000,
                )
            except Exception as e:
                result["errors"].append(f"detail page didn't render: {e}")
                browser.close()
                return result

            time.sleep(2)
            result["source_url"] = page.url

            # Extract text from each pa-component
            sections = page.evaluate("""
                () => {
                    const out = {};
                    const components = [
                        'pa-propertyinformation','pa-salesinformation',
                        'pa-assessmentinformation','pa-taxablevalueinformation',
                        'pa-benefitsinformation','pa-legaldescription',
                        'pa-additionalinformation',
                    ];
                    for (const tag of components) {
                        const el = document.querySelector(tag);
                        out[tag] = el ? (el.innerText || '').trim() : '';
                    }
                    return out;
                }
            """)

            # Also extract sales history table rows
            sales_rows = page.evaluate("""
                () => {
                    const out = [];
                    const sec = document.querySelector('pa-salesinformation');
                    if (!sec) return out;
                    const tbl = sec.querySelector('table');
                    if (!tbl) return out;
                    const rows = tbl.querySelectorAll('tr');
                    for (let i = 1; i < rows.length; i++) {
                        const cells = rows[i].querySelectorAll('td');
                        if (cells.length < 4) continue;
                        out.push({
                            date: (cells[0]?.textContent || '').trim(),
                            price: (cells[1]?.textContent || '').trim(),
                            book_page: (cells[2]?.textContent || '').trim(),
                            qualification: (cells[3]?.textContent || '').trim(),
                            previous_owner: cells.length > 4 ? (cells[4]?.textContent || '').trim() : '',
                        });
                    }
                    return out;
                }
            """)

            # Extract assessment table (3 years)
            # Header row: find the row whose first cell text is "Year".
            assessment_rows = page.evaluate("""
                () => {
                    const out = {};
                    const sec = document.querySelector('pa-assessmentinformation');
                    if (!sec) return out;
                    const tables = sec.querySelectorAll('table');
                    if (tables.length === 0) return out;
                    // Find header row in any table
                    let years = [];
                    let headerRowIdx = -1;
                    let chosenTbl = null;
                    for (const tbl of tables) {
                        const rows = tbl.querySelectorAll('tr');
                        for (let i = 0; i < rows.length; i++) {
                            const firstCell = (rows[i].querySelector('th, td')?.textContent || '').trim().toLowerCase();
                            if (firstCell === 'year') {
                                const headerCells = rows[i].querySelectorAll('th, td');
                                years = Array.from(headerCells).map(c => (c.textContent || '').trim()).slice(1);
                                headerRowIdx = i;
                                chosenTbl = tbl;
                                break;
                            }
                        }
                        if (chosenTbl) break;
                    }
                    if (!chosenTbl || years.length === 0) return out;
                    const rows = chosenTbl.querySelectorAll('tr');
                    for (let i = headerRowIdx + 1; i < rows.length; i++) {
                        const cells = rows[i].querySelectorAll('td, th');
                        if (cells.length < 2) continue;
                        const label = (cells[0]?.textContent || '').trim();
                        const values = {};
                        for (let j = 1; j < cells.length && j-1 < years.length; j++) {
                            values[years[j-1]] = (cells[j].textContent || '').trim();
                        }
                        if (label) out[label] = values;
                    }
                    return out;
                }
            """)

            # Extract taxable value table (by district)
            taxable_rows = page.evaluate("""
                () => {
                    const out = {};
                    const sec = document.querySelector('pa-taxablevalueinformation');
                    if (!sec) return out;
                    out._text = (sec.innerText || '').trim().substring(0, 2000);
                    return out;
                }
            """)

            browser.close()

            # ─── Post-process — parse via text labels ─────────────────────
            prop_text = sections.get("pa-propertyinformation", "")
            result["parcel_id"] = _grab_after_label(prop_text, "Folio")
            result["subdivision"] = _grab_after_label(prop_text, "Sub-Division")
            # Address: "Property Address\n{addr}"
            addr_block_match = re.search(
                r"Property Address\s*\n([^\n]+)", prop_text, re.IGNORECASE,
            )
            if addr_block_match:
                result["site_address"] = addr_block_match.group(1).strip()
            # Owner: "Owner\n{name(s)}"
            owner_match = re.search(
                r"Owner\s*\n([^\n]+(?:\n[^\n]+)?)", prop_text, re.IGNORECASE,
            )
            if owner_match:
                owner_text = owner_match.group(1).strip()
                # Split on newline for multiple owners
                lines = [l.strip() for l in owner_text.split("\n") if l.strip()]
                result["owner_name"] = lines[0] if lines else None
                result["co_owners"] = lines[1:] if len(lines) > 1 else []

            mailing_match = re.search(
                r"Mailing Address\s*\n((?:[^\n]+\n?){1,3})", prop_text, re.IGNORECASE,
            )
            if mailing_match:
                result["mailing_address"] = re.sub(
                    r"\s+", " ", mailing_match.group(1).strip(),
                )

            result["pa_primary_zone"] = _grab_after_label(prop_text, "PA Primary Zone")
            result["use_code"] = _grab_after_label(prop_text, "Primary Land Use")
            result["use_description"] = result.get("use_code")
            beds_baths = _grab_after_label(prop_text, "Beds / Baths /Half")
            if beds_baths:
                parts = [p.strip() for p in beds_baths.split("/")]
                try:
                    result["bedrooms"] = int(parts[0]) if parts[0] else None
                except (ValueError, IndexError):
                    result["bedrooms"] = None
                try:
                    result["baths"] = float(parts[1]) if len(parts) > 1 and parts[1] else None
                except (ValueError, IndexError):
                    result["baths"] = None
            result["floors"] = _to_int(_grab_after_label(prop_text, "Floors"))
            result["living_units"] = _to_int(_grab_after_label(prop_text, "Living Units"))
            living_area = _grab_after_label(prop_text, "Living Area")
            result["sqft_heated"] = _to_int(living_area) if living_area else None
            adj_area = _grab_after_label(prop_text, "Adjusted Area")
            result["sqft_total"] = _to_int(adj_area) if adj_area else None
            lot_size = _grab_after_label(prop_text, "Lot Size")
            result["lot_total_sqft"] = _to_int(lot_size) if lot_size else None
            result["year_built"] = _to_int(_grab_after_label(prop_text, "Year Built"))

            # Sales history — clean each row
            result["sales_history"] = []
            for r in sales_rows:
                date_str = r.get("date", "")
                price_str = r.get("price", "")
                # Skip header rows / non-data
                if not date_str or "Sale" in date_str or date_str.lower() == "previous sale":
                    continue
                rec = {
                    "date": date_str,
                    "price": _money_to_int(price_str),
                    "book_page": r.get("book_page", ""),
                    "qualification": r.get("qualification", ""),
                    "previous_owner": r.get("previous_owner", ""),
                    # Approximate Duval-compatible 'qualified' flag
                    "qualified": "Qualified" if "qual" in r.get("qualification", "").lower()
                                  and "disqual" not in r.get("qualification", "").lower()
                                  else "Unqualified",
                }
                if rec["date"]:
                    result["sales_history"].append(rec)

            # Most recent qualified sale
            qualified = [s for s in result["sales_history"]
                         if s.get("qualified", "").startswith("Qualified")
                         and s.get("price", 0) and s["price"] >= 1000]
            result["most_recent_qualified_sale"] = qualified[0] if qualified else None

            # Assessment 3-year values (Year column → Land, Building, Market, Assessed)
            # assessment_rows = {"Land Value": {"2025": "$0", ...}, "Market Value": {...}}
            result["assessment_table"] = assessment_rows
            # Resolve current/last/two-years
            years_present = []
            for label_dict in assessment_rows.values():
                if isinstance(label_dict, dict):
                    for y in label_dict.keys():
                        if y and y not in years_present:
                            years_present.append(y)
            # Pick most recent year as current
            years_present_sorted = sorted([y for y in years_present if y.isdigit()], reverse=True)
            current_year = years_present_sorted[0] if years_present_sorted else None
            last_year = years_present_sorted[1] if len(years_present_sorted) > 1 else None

            def _val(label, year):
                if year and assessment_rows.get(label):
                    return _money_to_int(assessment_rows[label].get(year, "0"))
                return None

            result["just_value_current"] = _val("Market Value", current_year)
            result["assessed_value_current"] = _val("Assessed Value", current_year)
            result["just_value_last"] = _val("Market Value", last_year)
            result["assessed_value_last"] = _val("Assessed Value", last_year)
            result["tax_year_current"] = int(current_year) if current_year else None
            result["tax_year_last"] = int(last_year) if last_year else None

            # Homestead detection from benefits section text
            benefits_text = sections.get("pa-benefitsinformation", "") or ""
            result["homestead_active"] = "homestead" in benefits_text.lower() and "$" in benefits_text

            # Legal description
            legal_text = sections.get("pa-legaldescription", "") or ""
            result["legal_description"] = re.sub(
                r"^Legal Description\s*\n",
                "",
                legal_text.strip(),
            )[:500] if legal_text else None

            # Renovation signal
            from data_fetchers.pa_duval import _detect_renovation_pattern
            result["renovation_signal"] = _detect_renovation_pattern(
                result["sales_history"], listing_price=listing_price,
            )

            # Raw sections for advanced consumers
            result["_raw_sections"] = sections
            result["_raw_taxable_text"] = taxable_rows.get("_text", "")

    except Exception as e:
        import traceback
        result["errors"].append(f"{type(e).__name__}: {e}")
        result["_trace"] = traceback.format_exc()[:600]

    return result


# ════════════════════════════════════════════════════════════════════════════
# CLI
# ════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    import argparse
    import json

    parser = argparse.ArgumentParser(description="Miami-Dade PA full record fetcher")
    parser.add_argument("--parcel", help="Folio number (e.g. '31-2202-034-2470')")
    parser.add_argument("--address", help="Alternative address search")
    args = parser.parse_args()

    if not args.parcel and not args.address:
        parser.error("--parcel or --address required")

    rec = fetch_miami_dade_pa_record(parcel_id=args.parcel, address=args.address)
    print(json.dumps(rec, indent=2, default=str))