feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
"""Full pipeline run: Miami-Dade Clerk scraper → deals.db → auto-classify.
|
||||
|
||||
Reports:
|
||||
- Total deals scraped (today + N days ahead)
|
||||
- Deals new / updated / errors
|
||||
- Classifications by status (potential_winner / maybe / pass / red_flag)
|
||||
- Sample of top 5 deals by classification_score
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("=" * 70)
|
||||
print("Miami-Dade Clerk FULL PIPELINE (scrape + persist + classify)")
|
||||
print("=" * 70)
|
||||
|
||||
from scrapers.miami_dade_clerk import run_scraper_to_db
|
||||
from deals_db import init_db, list_deals, count_deals_by_status
|
||||
init_db()
|
||||
|
||||
def log(m: str) -> None:
|
||||
print(f" {m}")
|
||||
|
||||
t0 = time.perf_counter()
|
||||
summary = run_scraper_to_db(
|
||||
days_ahead=14,
|
||||
days_back=0,
|
||||
auto_classify=True,
|
||||
status_cb=log,
|
||||
# max_dates not set → full 15 days
|
||||
)
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
print()
|
||||
print("=" * 70)
|
||||
print(f"PIPELINE SUMMARY (elapsed {elapsed:.0f}s = {elapsed/60:.1f} min)")
|
||||
print("=" * 70)
|
||||
for k, v in summary.items():
|
||||
if k == "errors":
|
||||
print(f" {k}: ({len(v)} items)")
|
||||
for e in v[:5]:
|
||||
print(f" - {e}")
|
||||
else:
|
||||
print(f" {k}: {v}")
|
||||
|
||||
# Show count breakdown
|
||||
print()
|
||||
print("--- deals.db counts by status ---")
|
||||
counts = count_deals_by_status()
|
||||
for s, n in sorted(counts.items()):
|
||||
print(f" {s}: {n}")
|
||||
|
||||
# Show top 5 by classification score
|
||||
print()
|
||||
print("--- TOP 5 by classification_score ---")
|
||||
top = list_deals(
|
||||
classification=None,
|
||||
source="miami_dade_clerk",
|
||||
limit=200,
|
||||
order_by="classification_score DESC NULLS LAST",
|
||||
)[:5]
|
||||
for i, d in enumerate(top, 1):
|
||||
score = d.get("classification_score")
|
||||
cls = d.get("classification_status")
|
||||
strategy = d.get("classification_strategy")
|
||||
addr = (d.get("address") or "(no address)")[:60]
|
||||
sb = d.get("starting_bid")
|
||||
sb_str = f"${sb:,.0f}" if sb else "Hidden/None"
|
||||
av = d.get("estimated_arv")
|
||||
av_str = f"${av:,.0f}" if av else "N/A"
|
||||
fj = d.get("final_judgment_amount")
|
||||
fj_str = f"${fj:,.0f}" if fj else "N/A"
|
||||
reasons_raw = d.get("classification_reasons", "[]")
|
||||
import json as _json
|
||||
try:
|
||||
reasons = _json.loads(reasons_raw) if reasons_raw else []
|
||||
except Exception:
|
||||
reasons = []
|
||||
print(f"\n [{i}] score={score} status={cls} strategy={strategy}")
|
||||
print(f" Case: {d.get('case_number')} | Type: {d.get('deal_type')}")
|
||||
print(f" Address: {addr}")
|
||||
print(f" Starting bid: {sb_str} | Assessed: {av_str} | Final Judgment: {fj_str}")
|
||||
print(f" Reasons:")
|
||||
for r in reasons[:4]:
|
||||
print(f" - {r}")
|
||||
|
||||
# Show classifications by status
|
||||
print()
|
||||
print("--- Classifications by status (Miami-Dade only) ---")
|
||||
by_class = {}
|
||||
all_md = list_deals(source="miami_dade_clerk", limit=500)
|
||||
for d in all_md:
|
||||
cs = d.get("classification_status") or "(unclassified)"
|
||||
by_class[cs] = by_class.get(cs, 0) + 1
|
||||
for cs, n in sorted(by_class.items()):
|
||||
print(f" {cs}: {n}")
|
||||
|
||||
print()
|
||||
print(f"✅ B1.4 COMPLETE — {summary['deals_new']} new deals persisted + classified")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user