Files
AR-House/scripts/test_search_engine_e2e.py
T
2026-07-03 12:24:58 -04:00

85 lines
3.2 KiB
Python

"""E2E test of search_engine.py against real deals.db (125 deals from B1+B3)."""
from __future__ import annotations
import io, sys
from pathlib import Path
from collections import Counter
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from search_engine import search_existing_only, preflight_check
from scrapers.registry import list_sources, get_sources_for_county
def main() -> int:
print("=" * 70)
print("E2E TEST: Search Engine against real deals.db (125 deals)")
print("=" * 70)
# Sim 1
print("\n--- Sim 1: Miami-Dade, both sources, no filters ---")
r = search_existing_only(
counties=["Miami-Dade"], source_ids=["miami_dade_clerk", "hud_homestore"]
)
print(f" {len(r)} deals matched")
for d in r[:3]:
addr = (d.get("address") or "?")[:50]
price = d.get("listing_price") or 0
print(f" {d.get('case_number')}: {d.get('deal_type'):<12} "
f"${price:>10,.0f} score={d.get('classification_score')} {addr}")
# Sim 2: HUD nationwide
print("\n--- Sim 2: HUD only, no county filter (all FL counties) ---")
r = search_existing_only(source_ids=["hud_homestore"])
print(f" {len(r)} HUD deals total")
counties = Counter(d.get("county") for d in r)
print(f" Top 10 counties:")
for c, n in counties.most_common(10):
print(f" {c}: {n}")
# Sim 3: Price + beds filter
print("\n--- Sim 3: $300K-$400K, beds>=3 ---")
r = search_existing_only(
filters={"min_price": 300000, "max_price": 400000, "beds_min": 3}
)
print(f" {len(r)} deals in range")
for d in r[:5]:
addr = (d.get("address") or "?")[:50]
print(f" {d.get('source'):<22} | {d.get('deal_type'):<12} | "
f"${d.get('listing_price') or 0:>8,.0f} | {d.get('beds')}bd | {addr}")
# Sim 4: only red_flag classification
print("\n--- Sim 4: classification=red_flag ---")
r = search_existing_only(filters={"classifications": ["red_flag"]})
print(f" {len(r)} red_flag deals")
# Sim 5: preflight check
print("\n--- Sim 5: preflight_check(both free sources) ---")
pf = preflight_check(["miami_dade_clerk", "hud_homestore"])
print(f" total_credits: {pf['total_credits_estimated']}")
print(f" ok_to_run: {pf['ok_to_run']}")
print(f" budget used: {pf['budget_snapshot']['credits_used']}/{pf['budget_snapshot']['credits_budget']}")
print(f" warnings: {pf['warnings']}")
# Sim 6: get_sources_for_county
print("\n--- Sim 6: get_sources_for_county('Miami-Dade') ---")
sources = get_sources_for_county("Miami-Dade")
for s in sources:
print(f" {s['id']}: free={s['free']}, deal_types={s['deal_types_produced']}")
print("\n--- Sim 7: get_sources_for_county('Broward') (no clerk yet) ---")
sources = get_sources_for_county("Broward")
for s in sources:
print(f" {s['id']}: free={s['free']}, deal_types={s['deal_types_produced']}")
print()
print("=" * 70)
print("✅ Search engine works against real deals.db")
print("=" * 70)
return 0
if __name__ == "__main__":
sys.exit(main())