"""Unit + smoke tests para Phase 3A — deals_db + DealClassifier. Tests: 1. deals_db CRUD: init, insert, dedup, list, update_classification, update_status 2. firecrawl tracking: record_usage, get_month_usage, alert levels 3. DealClassifier: precompute_heuristics, build_prompt, parse output 4. Smoke test: clasificar 4 deals reales (cada uno con expectativa clara) """ from __future__ import annotations import io, sys, time, os from pathlib import Path sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) def run_unit_tests(): """Test deals_db CRUD + Firecrawl tracking sin llamar Ollama.""" print("=" * 70) print("UNIT TESTS — deals_db + Firecrawl tracking") print("=" * 70) # Set DB path to a temp location for isolated testing test_db = ROOT / "data" / "deals_test.db" if test_db.exists(): test_db.unlink() os.environ.setdefault("DEALS_DB_PATH", str(test_db.relative_to(ROOT))) import deals_db # Override module-level _DB_PATH to point at test DB deals_db._DB_PATH = test_db # Reset thread-local connection if hasattr(deals_db._LOCAL, "conn"): deals_db._LOCAL.conn.close() del deals_db._LOCAL.conn from deals_db import ( init_db, insert_deal, get_deal_by_hash, get_deal_by_id, update_classification, update_status, list_deals, record_scraper_run, finish_scraper_run, list_recent_scraper_runs, record_firecrawl_usage, get_firecrawl_month_usage, firecrawl_alert_level, is_firecrawl_paused, firecrawl_budget_status, count_deals_by_status, compute_deal_hash, ) init_db() print("init_db OK") # Test 1: insert + dedup d1 = { "source": "miami_dade_clerk", "source_url": "https://example.com/case/12345", "address": "123 Main St, Miami, FL 33101", "city": "Miami", "state": "FL", "zip": "33101", "county": "Miami-Dade", "listing_price": 150000, "deal_type": "foreclosure", "starting_bid": 80000, "estimated_arv": 240000, "beds": 3, "baths": 2.0, "sqft": 1400, "year_built": 1985, "case_number": "2025-CA-001234", "auction_date": "2026-06-15", } id1, is_new = insert_deal(d1) assert is_new, f"first insert should be new, got is_new={is_new}" assert id1 > 0 print(f"INSERT 1: id={id1}, is_new={is_new} OK") # Test 2: re-insert same → should update, not insert id1b, is_new_b = insert_deal(d1) assert id1b == id1 assert not is_new_b print(f"INSERT 1 (re-insert): id={id1b} same as first, is_new={is_new_b} OK") # Test 3: different source → new row d2 = dict(d1) d2["source"] = "zillow" d2["source_url"] = "https://zillow.com/123" id2, is_new_2 = insert_deal(d2) assert is_new_2 assert id2 != id1 print(f"INSERT 2 (different source): id={id2} OK") # Test 4: hash function deterministic h1 = compute_deal_hash("miami_dade_clerk", "123 main st miami fl", 150000) h2 = compute_deal_hash("miami_dade_clerk", "123 Main St Miami FL", 150000) assert h1 == h2, "case-insensitive hash failed" print("compute_deal_hash case-insensitive OK") # Test 5: update_classification update_classification( deal_id=id1, status="potential_winner", score=85, reasons=["price_per_sqft $107 in Class C → 25% below market", "cap_rate_rough 8.5% above buy_hold threshold"], strategy="buy_hold", ) deal = get_deal_by_id(id1) assert deal["classification_status"] == "potential_winner" assert deal["classification_score"] == 85 assert deal["status"] == "classified", f"status should auto-flip to classified, got {deal['status']}" assert "Class C" in deal["classification_reasons"] print("update_classification OK (auto-flipped status new→classified)") # Test 6: list_deals filter winners = list_deals(classification="potential_winner") assert len(winners) == 1 assert winners[0]["id"] == id1 print(f"list_deals(classification=potential_winner): {len(winners)} deal OK") # Test 7: update_status update_status(id1, "interesting") deal = get_deal_by_id(id1) assert deal["status"] == "interesting" print("update_status OK") # Test 8: count_deals_by_status counts = count_deals_by_status() print(f"count_deals_by_status: {counts}") assert counts.get("interesting", 0) == 1 assert counts.get("new", 0) == 1 # Test 9: scraper runs run_id = record_scraper_run("miami_dade_clerk") assert run_id > 0 finish_scraper_run(run_id, deals_found=15, deals_new=3, deals_updated=12, errors_count=0, firecrawl_credits_used=0, status="success") runs = list_recent_scraper_runs(source="miami_dade_clerk") assert len(runs) == 1 assert runs[0]["status"] == "success" assert runs[0]["deals_new"] == 3 print(f"scraper_runs: id={run_id} deals_new={runs[0]['deals_new']} OK") # Test 10: firecrawl tracking record_firecrawl_usage(source="zillow_scraper", credits=5, url="https://...") record_firecrawl_usage(source="realtor_scraper", credits=8, url="https://...") total = get_firecrawl_month_usage() assert total == 13, f"expected 13, got {total}" print(f"firecrawl_month_usage: {total} credits OK") # Test 11: alert level level = firecrawl_alert_level() assert level == "ok", f"with 13 credits and budget 500, should be 'ok', got {level}" paused = is_firecrawl_paused() assert not paused print(f"firecrawl_alert_level: {level} OK, paused={paused}") # Test 12: simulate hitting 80% threshold record_firecrawl_usage(source="bulk_test", credits=400) level = firecrawl_alert_level() assert level == "warn", f"with 413/500 credits should be 'warn', got {level}" print(f"firecrawl alert at 82% usage: {level} OK") # Test 13: simulate hitting 95% pause record_firecrawl_usage(source="bulk_test", credits=65) level = firecrawl_alert_level() assert level == "pause", f"with 478/500 credits should be 'pause', got {level}" assert is_firecrawl_paused() print(f"firecrawl auto-pause at 95.6% usage: {level} OK") # Test 14: budget snapshot snap = firecrawl_budget_status() print(f"firecrawl_budget_status: {snap}") # Cleanup deals_db._LOCAL.conn.close() del deals_db._LOCAL.conn test_db.unlink() print() print("=== ALL UNIT TESTS PASSED ===") return 0 def run_classifier_smoke(): """Smoke test: clasificar 4 deals reales con expectativas.""" print() print("=" * 70) print("SMOKE TEST — DealClassifier con 4 deals reales") print("=" * 70) from deal_classifier import classify_deal test_cases = [ { "name": "Miami foreclosure $80K starting bid, ARV $240K", "expected_status": "potential_winner", "deal": { "source": "miami_dade_clerk", "deal_type": "foreclosure", "address": "789 NE 1st St, Miami, FL 33132", "city": "Miami", "county": "Miami-Dade", "state": "FL", "zip": "33132", "listing_price": 80000, "starting_bid": 80000, "estimated_arv": 240000, "beds": 3, "baths": 2.0, "sqft": 1400, "year_built": 1995, "case_number": "2025-CA-001234", "auction_date": "2026-06-15", }, }, { "name": "Miami MLS retail $450K Class B normal price", "expected_status": "maybe", # normal MLS dentro de market "deal": { "source": "zillow", "deal_type": "mls", "address": "100 Brickell Ave, Miami, FL 33131", "city": "Miami", "county": "Miami-Dade", "state": "FL", "zip": "33131", "listing_price": 450000, "beds": 3, "baths": 2.0, "sqft": 1800, "year_built": 2005, }, }, { "name": "Jacksonville $25K tax_deed 1967 build (red flag)", "expected_status": "red_flag", "deal": { "source": "duval_tax_collector", "deal_type": "tax_deed", "address": "456 W 21st St, Jacksonville, FL 32209", "city": "Jacksonville", "county": "Duval", "state": "FL", "zip": "32209", "listing_price": 25000, "starting_bid": 25000, "beds": 2, "baths": 1.0, "sqft": 900, "year_built": 1967, }, }, { "name": "Hialeah MLS $600K Class C overpriced", "expected_status": "pass", "deal": { "source": "realtor", "deal_type": "mls", "address": "1234 W 49th St, Hialeah, FL 33012", "city": "Hialeah", "county": "Miami-Dade", "state": "FL", "zip": "33012", "listing_price": 600000, "beds": 3, "baths": 2.0, "sqft": 1500, "year_built": 1970, }, }, ] results = [] for i, tc in enumerate(test_cases, 1): print(f"\n--- [{i}/{len(test_cases)}] {tc['name']} ---") print(f" Expected: {tc['expected_status']}") t0 = time.perf_counter() result = classify_deal(tc["deal"]) dur = time.perf_counter() - t0 actual = result["classification_status"] match = "✅" if actual == tc["expected_status"] else "⚠️" print(f" Actual: {actual} (score {result['score']})") print(f" Strategy: {result['strategy']}") print(f" Reasons:") for r in result["reasons"]: print(f" - {r}") print(f" Match: {match} | Duration: {dur:.1f}s | tokens: {result['_meta']['tokens']}") if result["_meta"].get("ollama_error"): print(f" ❌ Ollama error: {result['_meta']['ollama_error']}") if result["_meta"].get("parse_error"): print(f" ⚠️ Parse error: {result['_meta'].get('parse_error_detail')}") results.append((tc["name"], tc["expected_status"], actual, dur)) print() print("=" * 70) print("SUMMARY") print("=" * 70) matches = sum(1 for _, exp, act, _ in results if exp == act) avg_dur = sum(d for _, _, _, d in results) / len(results) print(f" Match rate: {matches}/{len(results)}") print(f" Avg duration: {avg_dur:.1f}s per deal") return 0 if __name__ == "__main__": rc1 = run_unit_tests() rc2 = run_classifier_smoke() sys.exit(rc1 or rc2)