273 lines
10 KiB
Python
273 lines
10 KiB
Python
"""Unit + smoke tests para Phase 3A — deals_db + DealClassifier.
|
|
|
|
Tests:
|
|
1. deals_db CRUD: init, insert, dedup, list, update_classification, update_status
|
|
2. firecrawl tracking: record_usage, get_month_usage, alert levels
|
|
3. DealClassifier: precompute_heuristics, build_prompt, parse output
|
|
4. Smoke test: clasificar 4 deals reales (cada uno con expectativa clara)
|
|
"""
|
|
from __future__ import annotations
|
|
import io, sys, time, os
|
|
from pathlib import Path
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
def run_unit_tests():
|
|
"""Test deals_db CRUD + Firecrawl tracking sin llamar Ollama."""
|
|
print("=" * 70)
|
|
print("UNIT TESTS — deals_db + Firecrawl tracking")
|
|
print("=" * 70)
|
|
|
|
# Set DB path to a temp location for isolated testing
|
|
test_db = ROOT / "data" / "deals_test.db"
|
|
if test_db.exists():
|
|
test_db.unlink()
|
|
os.environ.setdefault("DEALS_DB_PATH", str(test_db.relative_to(ROOT)))
|
|
|
|
import deals_db
|
|
# Override module-level _DB_PATH to point at test DB
|
|
deals_db._DB_PATH = test_db
|
|
# Reset thread-local connection
|
|
if hasattr(deals_db._LOCAL, "conn"):
|
|
deals_db._LOCAL.conn.close()
|
|
del deals_db._LOCAL.conn
|
|
|
|
from deals_db import (
|
|
init_db, insert_deal, get_deal_by_hash, get_deal_by_id,
|
|
update_classification, update_status, list_deals,
|
|
record_scraper_run, finish_scraper_run, list_recent_scraper_runs,
|
|
record_firecrawl_usage, get_firecrawl_month_usage,
|
|
firecrawl_alert_level, is_firecrawl_paused, firecrawl_budget_status,
|
|
count_deals_by_status, compute_deal_hash,
|
|
)
|
|
|
|
init_db()
|
|
print("init_db OK")
|
|
|
|
# Test 1: insert + dedup
|
|
d1 = {
|
|
"source": "miami_dade_clerk",
|
|
"source_url": "https://example.com/case/12345",
|
|
"address": "123 Main St, Miami, FL 33101",
|
|
"city": "Miami", "state": "FL", "zip": "33101", "county": "Miami-Dade",
|
|
"listing_price": 150000,
|
|
"deal_type": "foreclosure",
|
|
"starting_bid": 80000,
|
|
"estimated_arv": 240000,
|
|
"beds": 3, "baths": 2.0, "sqft": 1400, "year_built": 1985,
|
|
"case_number": "2025-CA-001234",
|
|
"auction_date": "2026-06-15",
|
|
}
|
|
id1, is_new = insert_deal(d1)
|
|
assert is_new, f"first insert should be new, got is_new={is_new}"
|
|
assert id1 > 0
|
|
print(f"INSERT 1: id={id1}, is_new={is_new} OK")
|
|
|
|
# Test 2: re-insert same → should update, not insert
|
|
id1b, is_new_b = insert_deal(d1)
|
|
assert id1b == id1
|
|
assert not is_new_b
|
|
print(f"INSERT 1 (re-insert): id={id1b} same as first, is_new={is_new_b} OK")
|
|
|
|
# Test 3: different source → new row
|
|
d2 = dict(d1)
|
|
d2["source"] = "zillow"
|
|
d2["source_url"] = "https://zillow.com/123"
|
|
id2, is_new_2 = insert_deal(d2)
|
|
assert is_new_2
|
|
assert id2 != id1
|
|
print(f"INSERT 2 (different source): id={id2} OK")
|
|
|
|
# Test 4: hash function deterministic
|
|
h1 = compute_deal_hash("miami_dade_clerk", "123 main st miami fl", 150000)
|
|
h2 = compute_deal_hash("miami_dade_clerk", "123 Main St Miami FL", 150000)
|
|
assert h1 == h2, "case-insensitive hash failed"
|
|
print("compute_deal_hash case-insensitive OK")
|
|
|
|
# Test 5: update_classification
|
|
update_classification(
|
|
deal_id=id1,
|
|
status="potential_winner",
|
|
score=85,
|
|
reasons=["price_per_sqft $107 in Class C → 25% below market",
|
|
"cap_rate_rough 8.5% above buy_hold threshold"],
|
|
strategy="buy_hold",
|
|
)
|
|
deal = get_deal_by_id(id1)
|
|
assert deal["classification_status"] == "potential_winner"
|
|
assert deal["classification_score"] == 85
|
|
assert deal["status"] == "classified", f"status should auto-flip to classified, got {deal['status']}"
|
|
assert "Class C" in deal["classification_reasons"]
|
|
print("update_classification OK (auto-flipped status new→classified)")
|
|
|
|
# Test 6: list_deals filter
|
|
winners = list_deals(classification="potential_winner")
|
|
assert len(winners) == 1
|
|
assert winners[0]["id"] == id1
|
|
print(f"list_deals(classification=potential_winner): {len(winners)} deal OK")
|
|
|
|
# Test 7: update_status
|
|
update_status(id1, "interesting")
|
|
deal = get_deal_by_id(id1)
|
|
assert deal["status"] == "interesting"
|
|
print("update_status OK")
|
|
|
|
# Test 8: count_deals_by_status
|
|
counts = count_deals_by_status()
|
|
print(f"count_deals_by_status: {counts}")
|
|
assert counts.get("interesting", 0) == 1
|
|
assert counts.get("new", 0) == 1
|
|
|
|
# Test 9: scraper runs
|
|
run_id = record_scraper_run("miami_dade_clerk")
|
|
assert run_id > 0
|
|
finish_scraper_run(run_id, deals_found=15, deals_new=3, deals_updated=12,
|
|
errors_count=0, firecrawl_credits_used=0, status="success")
|
|
runs = list_recent_scraper_runs(source="miami_dade_clerk")
|
|
assert len(runs) == 1
|
|
assert runs[0]["status"] == "success"
|
|
assert runs[0]["deals_new"] == 3
|
|
print(f"scraper_runs: id={run_id} deals_new={runs[0]['deals_new']} OK")
|
|
|
|
# Test 10: firecrawl tracking
|
|
record_firecrawl_usage(source="zillow_scraper", credits=5, url="https://...")
|
|
record_firecrawl_usage(source="realtor_scraper", credits=8, url="https://...")
|
|
total = get_firecrawl_month_usage()
|
|
assert total == 13, f"expected 13, got {total}"
|
|
print(f"firecrawl_month_usage: {total} credits OK")
|
|
|
|
# Test 11: alert level
|
|
level = firecrawl_alert_level()
|
|
assert level == "ok", f"with 13 credits and budget 500, should be 'ok', got {level}"
|
|
paused = is_firecrawl_paused()
|
|
assert not paused
|
|
print(f"firecrawl_alert_level: {level} OK, paused={paused}")
|
|
|
|
# Test 12: simulate hitting 80% threshold
|
|
record_firecrawl_usage(source="bulk_test", credits=400)
|
|
level = firecrawl_alert_level()
|
|
assert level == "warn", f"with 413/500 credits should be 'warn', got {level}"
|
|
print(f"firecrawl alert at 82% usage: {level} OK")
|
|
|
|
# Test 13: simulate hitting 95% pause
|
|
record_firecrawl_usage(source="bulk_test", credits=65)
|
|
level = firecrawl_alert_level()
|
|
assert level == "pause", f"with 478/500 credits should be 'pause', got {level}"
|
|
assert is_firecrawl_paused()
|
|
print(f"firecrawl auto-pause at 95.6% usage: {level} OK")
|
|
|
|
# Test 14: budget snapshot
|
|
snap = firecrawl_budget_status()
|
|
print(f"firecrawl_budget_status: {snap}")
|
|
|
|
# Cleanup
|
|
deals_db._LOCAL.conn.close()
|
|
del deals_db._LOCAL.conn
|
|
test_db.unlink()
|
|
print()
|
|
print("=== ALL UNIT TESTS PASSED ===")
|
|
return 0
|
|
|
|
|
|
def run_classifier_smoke():
|
|
"""Smoke test: clasificar 4 deals reales con expectativas."""
|
|
print()
|
|
print("=" * 70)
|
|
print("SMOKE TEST — DealClassifier con 4 deals reales")
|
|
print("=" * 70)
|
|
|
|
from deal_classifier import classify_deal
|
|
|
|
test_cases = [
|
|
{
|
|
"name": "Miami foreclosure $80K starting bid, ARV $240K",
|
|
"expected_status": "potential_winner",
|
|
"deal": {
|
|
"source": "miami_dade_clerk",
|
|
"deal_type": "foreclosure",
|
|
"address": "789 NE 1st St, Miami, FL 33132",
|
|
"city": "Miami", "county": "Miami-Dade", "state": "FL", "zip": "33132",
|
|
"listing_price": 80000, "starting_bid": 80000, "estimated_arv": 240000,
|
|
"beds": 3, "baths": 2.0, "sqft": 1400, "year_built": 1995,
|
|
"case_number": "2025-CA-001234",
|
|
"auction_date": "2026-06-15",
|
|
},
|
|
},
|
|
{
|
|
"name": "Miami MLS retail $450K Class B normal price",
|
|
"expected_status": "maybe", # normal MLS dentro de market
|
|
"deal": {
|
|
"source": "zillow",
|
|
"deal_type": "mls",
|
|
"address": "100 Brickell Ave, Miami, FL 33131",
|
|
"city": "Miami", "county": "Miami-Dade", "state": "FL", "zip": "33131",
|
|
"listing_price": 450000,
|
|
"beds": 3, "baths": 2.0, "sqft": 1800, "year_built": 2005,
|
|
},
|
|
},
|
|
{
|
|
"name": "Jacksonville $25K tax_deed 1967 build (red flag)",
|
|
"expected_status": "red_flag",
|
|
"deal": {
|
|
"source": "duval_tax_collector",
|
|
"deal_type": "tax_deed",
|
|
"address": "456 W 21st St, Jacksonville, FL 32209",
|
|
"city": "Jacksonville", "county": "Duval", "state": "FL", "zip": "32209",
|
|
"listing_price": 25000, "starting_bid": 25000,
|
|
"beds": 2, "baths": 1.0, "sqft": 900, "year_built": 1967,
|
|
},
|
|
},
|
|
{
|
|
"name": "Hialeah MLS $600K Class C overpriced",
|
|
"expected_status": "pass",
|
|
"deal": {
|
|
"source": "realtor",
|
|
"deal_type": "mls",
|
|
"address": "1234 W 49th St, Hialeah, FL 33012",
|
|
"city": "Hialeah", "county": "Miami-Dade", "state": "FL", "zip": "33012",
|
|
"listing_price": 600000,
|
|
"beds": 3, "baths": 2.0, "sqft": 1500, "year_built": 1970,
|
|
},
|
|
},
|
|
]
|
|
|
|
results = []
|
|
for i, tc in enumerate(test_cases, 1):
|
|
print(f"\n--- [{i}/{len(test_cases)}] {tc['name']} ---")
|
|
print(f" Expected: {tc['expected_status']}")
|
|
t0 = time.perf_counter()
|
|
result = classify_deal(tc["deal"])
|
|
dur = time.perf_counter() - t0
|
|
actual = result["classification_status"]
|
|
match = "✅" if actual == tc["expected_status"] else "⚠️"
|
|
print(f" Actual: {actual} (score {result['score']})")
|
|
print(f" Strategy: {result['strategy']}")
|
|
print(f" Reasons:")
|
|
for r in result["reasons"]:
|
|
print(f" - {r}")
|
|
print(f" Match: {match} | Duration: {dur:.1f}s | tokens: {result['_meta']['tokens']}")
|
|
if result["_meta"].get("ollama_error"):
|
|
print(f" ❌ Ollama error: {result['_meta']['ollama_error']}")
|
|
if result["_meta"].get("parse_error"):
|
|
print(f" ⚠️ Parse error: {result['_meta'].get('parse_error_detail')}")
|
|
results.append((tc["name"], tc["expected_status"], actual, dur))
|
|
|
|
print()
|
|
print("=" * 70)
|
|
print("SUMMARY")
|
|
print("=" * 70)
|
|
matches = sum(1 for _, exp, act, _ in results if exp == act)
|
|
avg_dur = sum(d for _, _, _, d in results) / len(results)
|
|
print(f" Match rate: {matches}/{len(results)}")
|
|
print(f" Avg duration: {avg_dur:.1f}s per deal")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
rc1 = run_unit_tests()
|
|
rc2 = run_classifier_smoke()
|
|
sys.exit(rc1 or rc2)
|