AR-House/scripts/test_price_validator_post_fix.py

"""Verify price_validator detection logic survives Firecrawl SDK API rename.

Tests:
1. Heuristic path (no Firecrawl needed): suspicious_low_listing detection
2. With existing comps (no Firecrawl needed): CRITICAL_RED_FLAG detection
3. With tax_assessed (no Firecrawl needed): WARNING / NORMAL detection
4. Confirm: module imports cleanly (no syntax errors from rename)
"""
from __future__ import annotations
import io, sys
from pathlib import Path

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))

import ast
# Parse-test the fixed files
for f in ["data_fetchers/price_validator.py", "data_fetchers/property_value.py"]:
    ast.parse(open(f, encoding="utf-8").read())
print(f"✅ Both files parse OK")

import data_fetchers  # load .env
from data_fetchers.price_validator import validate_price


def main() -> int:
    print()
    print("=" * 70)
    print("PRICE VALIDATOR — post Firecrawl SDK API rename")
    print("=" * 70)

    failures = 0

    # ─── Test 1: Heuristic path — Jacksonville $70K listing ────────────────
    print("\n--- Test 1: Jacksonville $70K (heuristic path, no Firecrawl) ---")
    r = validate_price(
        address="3245 N Pearl St, Jacksonville, FL 32209",
        listing_price=70_000,
        tax_assessed_value=None,
        existing_comps_estimate=None,
        existing_comps_confidence=None,
        existing_comps_sources=None,
        neighborhood_class=None,
        use_firecrawl=False,  # explicit: don't call Firecrawl
    )
    print(f"  status: {r['status']}")
    print(f"  suspicious_low_listing: {r.get('suspicious_low_listing')}")
    print(f"  possible_reasons count: {len(r.get('possible_reasons') or [])}")
    if r.get("possible_reasons"):
        print(f"  first reason: {r['possible_reasons'][0][:90]}")
    if r["status"] == "UNKNOWN" and r.get("suspicious_low_listing"):
        print(f"  ✅ Detected suspicious low listing (FORECLOSURE hypothesis)")
    else:
        print(f"  ❌ Expected UNKNOWN + suspicious_low_listing=True")
        failures += 1

    # ─── Test 2: With existing high-confidence comps ($70K vs $280K mid) ───
    print("\n--- Test 2: Jacksonville $70K vs $280K comps mid (CRITICAL_RED_FLAG expected) ---")
    r = validate_price(
        address="3245 N Pearl St, Jacksonville, FL 32209",
        listing_price=70_000,
        existing_comps_estimate=280_000,
        existing_comps_confidence="medium",  # not 'low' — should be accepted
        existing_comps_sources=["Comps Firecrawl (Jacksonville)"],
        neighborhood_class="C",
        use_firecrawl=False,
    )
    print(f"  status: {r['status']}")
    print(f"  signed_max_discrepancy_pct: {r.get('signed_max_discrepancy_pct')}")
    print(f"  possible_reasons: {len(r.get('possible_reasons') or [])}")
    if r["status"] == "CRITICAL_RED_FLAG" and r.get("signed_max_discrepancy_pct", 0) < 0:
        print(f"  ✅ CRITICAL_RED_FLAG fired (listing {r['signed_max_discrepancy_pct']}% below market)")
    else:
        print(f"  ❌ Expected CRITICAL_RED_FLAG with negative discrepancy")
        failures += 1

    # ─── Test 3: Low-confidence comps should be REJECTED ───────────────────
    print("\n--- Test 3: low-confidence comps (heuristic-only) should be rejected ---")
    r = validate_price(
        address="3245 N Pearl St, Jacksonville, FL 32209",
        listing_price=70_000,
        existing_comps_estimate=37_000,
        existing_comps_confidence="low",  # → should reject
        existing_comps_sources=["Deductions por edad (heuristica FL)"],
        neighborhood_class="D",
        use_firecrawl=False,
    )
    print(f"  status: {r['status']}")
    print(f"  rejected_sources: {len(r.get('rejected_sources') or [])}")
    if r["status"] == "UNKNOWN" and r.get("rejected_sources"):
        print(f"  ✅ Low-confidence comps correctly rejected, fallback to UNKNOWN+suspicious")
    else:
        print(f"  ❌ Expected UNKNOWN with rejected_sources")
        failures += 1

    # ─── Test 4: NORMAL case (listing in line with comps) ───────────────────
    print("\n--- Test 4: $275K listing with $280K comps (NORMAL expected) ---")
    r = validate_price(
        address="100 Main St, Tampa, FL 33602",
        listing_price=275_000,
        existing_comps_estimate=280_000,
        existing_comps_confidence="medium",
        existing_comps_sources=["Comps Firecrawl"],
        neighborhood_class="B",
        use_firecrawl=False,
    )
    print(f"  status: {r['status']}")
    print(f"  signed_max_discrepancy_pct: {r.get('signed_max_discrepancy_pct')}")
    if r["status"] == "NORMAL":
        print(f"  ✅ NORMAL fired (within ±10%)")
    else:
        print(f"  ❌ Expected NORMAL")
        failures += 1

    # ─── Test 5: Confirm Firecrawl-disabled path still returns errors gracefully ─
    print("\n--- Test 5: ENABLE_FIRECRAWL_PRICE_CHECK=false → should NOT crash ---")
    import os
    os.environ.pop("ENABLE_FIRECRAWL_PRICE_CHECK", None)
    from data_fetchers.price_validator import fetch_zillow_zestimate, fetch_redfin_estimate
    z, errz = fetch_zillow_zestimate("123 Test St, Miami, FL")
    print(f"  Zillow: result={z}, errors={errz[0][:80] if errz else None}")
    rfn, errrfn = fetch_redfin_estimate("123 Test St, Miami, FL")
    print(f"  Redfin: result={rfn}, errors={errrfn[0][:80] if errrfn else None}")
    if z is None and rfn is None:
        print(f"  ✅ Both return None gracefully when flag is off (no crash)")
    else:
        print(f"  ❌ Expected None for both")
        failures += 1

    print()
    print("=" * 70)
    if failures == 0:
        print("✅ ALL 5 TESTS PASSED — detection logic intact after API rename")
    else:
        print(f"❌ {failures} test(s) failed")
    return failures


if __name__ == "__main__":
    sys.exit(main())