"""E2E test of search_engine.py against real deals.db (125 deals from B1+B3).""" from __future__ import annotations import io, sys from pathlib import Path from collections import Counter sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from search_engine import search_existing_only, preflight_check from scrapers.registry import list_sources, get_sources_for_county def main() -> int: print("=" * 70) print("E2E TEST: Search Engine against real deals.db (125 deals)") print("=" * 70) # Sim 1 print("\n--- Sim 1: Miami-Dade, both sources, no filters ---") r = search_existing_only( counties=["Miami-Dade"], source_ids=["miami_dade_clerk", "hud_homestore"] ) print(f" {len(r)} deals matched") for d in r[:3]: addr = (d.get("address") or "?")[:50] price = d.get("listing_price") or 0 print(f" {d.get('case_number')}: {d.get('deal_type'):<12} " f"${price:>10,.0f} score={d.get('classification_score')} {addr}") # Sim 2: HUD nationwide print("\n--- Sim 2: HUD only, no county filter (all FL counties) ---") r = search_existing_only(source_ids=["hud_homestore"]) print(f" {len(r)} HUD deals total") counties = Counter(d.get("county") for d in r) print(f" Top 10 counties:") for c, n in counties.most_common(10): print(f" {c}: {n}") # Sim 3: Price + beds filter print("\n--- Sim 3: $300K-$400K, beds>=3 ---") r = search_existing_only( filters={"min_price": 300000, "max_price": 400000, "beds_min": 3} ) print(f" {len(r)} deals in range") for d in r[:5]: addr = (d.get("address") or "?")[:50] print(f" {d.get('source'):<22} | {d.get('deal_type'):<12} | " f"${d.get('listing_price') or 0:>8,.0f} | {d.get('beds')}bd | {addr}") # Sim 4: only red_flag classification print("\n--- Sim 4: classification=red_flag ---") r = search_existing_only(filters={"classifications": ["red_flag"]}) print(f" {len(r)} red_flag deals") # Sim 5: preflight check print("\n--- Sim 5: preflight_check(both free sources) ---") pf = preflight_check(["miami_dade_clerk", "hud_homestore"]) print(f" total_credits: {pf['total_credits_estimated']}") print(f" ok_to_run: {pf['ok_to_run']}") print(f" budget used: {pf['budget_snapshot']['credits_used']}/{pf['budget_snapshot']['credits_budget']}") print(f" warnings: {pf['warnings']}") # Sim 6: get_sources_for_county print("\n--- Sim 6: get_sources_for_county('Miami-Dade') ---") sources = get_sources_for_county("Miami-Dade") for s in sources: print(f" {s['id']}: free={s['free']}, deal_types={s['deal_types_produced']}") print("\n--- Sim 7: get_sources_for_county('Broward') (no clerk yet) ---") sources = get_sources_for_county("Broward") for s in sources: print(f" {s['id']}: free={s['free']}, deal_types={s['deal_types_produced']}") print() print("=" * 70) print("✅ Search engine works against real deals.db") print("=" * 70) return 0 if __name__ == "__main__": sys.exit(main())