83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
"""Full HUD pipeline: scrape FL → persist → classify."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
from pathlib import Path
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
def main() -> int:
|
|
print("=" * 70)
|
|
print("HUD Homestore FULL PIPELINE (FL only)")
|
|
print("=" * 70)
|
|
|
|
from scrapers.hud_homestore import run_scraper_to_db
|
|
from deals_db import init_db, list_deals
|
|
init_db()
|
|
|
|
def log(m): print(f" {m}")
|
|
|
|
t0 = time.perf_counter()
|
|
summary = run_scraper_to_db(states=["FL"], auto_classify=True, status_cb=log)
|
|
elapsed = time.perf_counter() - t0
|
|
|
|
print()
|
|
print("=" * 70)
|
|
print(f"PIPELINE SUMMARY (elapsed {elapsed:.0f}s = {elapsed/60:.1f} min)")
|
|
print("=" * 70)
|
|
for k, v in summary.items():
|
|
if k == "errors":
|
|
print(f" {k}: ({len(v)} items)")
|
|
for e in v[:3]:
|
|
print(f" - {e}")
|
|
else:
|
|
print(f" {k}: {v}")
|
|
|
|
# Show classification breakdown for HUD source
|
|
print()
|
|
print("--- HUD deals by classification ---")
|
|
hud = list_deals(source="hud_homestore", limit=200)
|
|
by_class = {}
|
|
for d in hud:
|
|
cs = d.get("classification_status") or "(unclassified)"
|
|
by_class[cs] = by_class.get(cs, 0) + 1
|
|
for cs in sorted(by_class.keys()):
|
|
print(f" {cs}: {by_class[cs]}")
|
|
|
|
print()
|
|
print("--- TOP 10 HUD deals by classification_score ---")
|
|
top = sorted(hud, key=lambda d: (d.get("classification_score") or 0), reverse=True)[:10]
|
|
print(f"{'#':<3} {'Score':<6} {'Status':<20} {'Strategy':<14} {'Price':<10} {'Beds':<5} Address (county)")
|
|
print("-" * 130)
|
|
import json as _json
|
|
for i, d in enumerate(top, 1):
|
|
score = d.get("classification_score") or 0
|
|
cls = d.get("classification_status") or "?"
|
|
strat = d.get("classification_strategy") or "?"
|
|
price = d.get("listing_price")
|
|
price_str = f"${price:,.0f}" if price else "N/A"
|
|
beds = d.get("beds")
|
|
addr = (d.get("address") or "?")[:70]
|
|
county = d.get("county") or ""
|
|
print(f"{i:<3} {score:<6} {cls:<20} {strat:<14} {price_str:<10} {beds!s:<5} {addr} ({county})")
|
|
|
|
# Print 3 sample reasons
|
|
print()
|
|
print("--- Sample reasons (top 3) ---")
|
|
for i, d in enumerate(top[:3], 1):
|
|
print(f"\n [{i}] {d.get('case_number')} — {d.get('classification_status')} score {d.get('classification_score')}")
|
|
try:
|
|
reasons = _json.loads(d.get("classification_reasons") or "[]")
|
|
for r in reasons:
|
|
print(f" - {r}")
|
|
except Exception:
|
|
pass
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|