feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
"""Full HUD pipeline: scrape FL → persist → classify."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("=" * 70)
|
||||
print("HUD Homestore FULL PIPELINE (FL only)")
|
||||
print("=" * 70)
|
||||
|
||||
from scrapers.hud_homestore import run_scraper_to_db
|
||||
from deals_db import init_db, list_deals
|
||||
init_db()
|
||||
|
||||
def log(m): print(f" {m}")
|
||||
|
||||
t0 = time.perf_counter()
|
||||
summary = run_scraper_to_db(states=["FL"], auto_classify=True, status_cb=log)
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
print()
|
||||
print("=" * 70)
|
||||
print(f"PIPELINE SUMMARY (elapsed {elapsed:.0f}s = {elapsed/60:.1f} min)")
|
||||
print("=" * 70)
|
||||
for k, v in summary.items():
|
||||
if k == "errors":
|
||||
print(f" {k}: ({len(v)} items)")
|
||||
for e in v[:3]:
|
||||
print(f" - {e}")
|
||||
else:
|
||||
print(f" {k}: {v}")
|
||||
|
||||
# Show classification breakdown for HUD source
|
||||
print()
|
||||
print("--- HUD deals by classification ---")
|
||||
hud = list_deals(source="hud_homestore", limit=200)
|
||||
by_class = {}
|
||||
for d in hud:
|
||||
cs = d.get("classification_status") or "(unclassified)"
|
||||
by_class[cs] = by_class.get(cs, 0) + 1
|
||||
for cs in sorted(by_class.keys()):
|
||||
print(f" {cs}: {by_class[cs]}")
|
||||
|
||||
print()
|
||||
print("--- TOP 10 HUD deals by classification_score ---")
|
||||
top = sorted(hud, key=lambda d: (d.get("classification_score") or 0), reverse=True)[:10]
|
||||
print(f"{'#':<3} {'Score':<6} {'Status':<20} {'Strategy':<14} {'Price':<10} {'Beds':<5} Address (county)")
|
||||
print("-" * 130)
|
||||
import json as _json
|
||||
for i, d in enumerate(top, 1):
|
||||
score = d.get("classification_score") or 0
|
||||
cls = d.get("classification_status") or "?"
|
||||
strat = d.get("classification_strategy") or "?"
|
||||
price = d.get("listing_price")
|
||||
price_str = f"${price:,.0f}" if price else "N/A"
|
||||
beds = d.get("beds")
|
||||
addr = (d.get("address") or "?")[:70]
|
||||
county = d.get("county") or ""
|
||||
print(f"{i:<3} {score:<6} {cls:<20} {strat:<14} {price_str:<10} {beds!s:<5} {addr} ({county})")
|
||||
|
||||
# Print 3 sample reasons
|
||||
print()
|
||||
print("--- Sample reasons (top 3) ---")
|
||||
for i, d in enumerate(top[:3], 1):
|
||||
print(f"\n [{i}] {d.get('case_number')} — {d.get('classification_status')} score {d.get('classification_score')}")
|
||||
try:
|
||||
reasons = _json.loads(d.get("classification_reasons") or "[]")
|
||||
for r in reasons:
|
||||
print(f" - {r}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user