86 lines
3.8 KiB
Python
86 lines
3.8 KiB
Python
"""Test scraper Miami-Dade Clerk con 3 dias para verificar parsing antes del full run."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
from pathlib import Path
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
def main() -> int:
|
|
print("=" * 70)
|
|
print("Miami-Dade Clerk scraper smoke test")
|
|
print("=" * 70)
|
|
|
|
def log(m: str) -> None:
|
|
print(f" {m}")
|
|
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
# PASO 1: scrape sin persistir, solo para ver que data sale
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
from scrapers.miami_dade_clerk import scrape_miami_dade_auctions
|
|
|
|
print("\n--- PASO 1: scrape (no DB), 3 dias ahead ---")
|
|
t0 = time.perf_counter()
|
|
deals = scrape_miami_dade_auctions(days_ahead=3, status_cb=log)
|
|
elapsed = time.perf_counter() - t0
|
|
print(f"\nResult: {len(deals)} deals in {elapsed:.1f}s")
|
|
|
|
if not deals:
|
|
print("❌ NO DEALS scraped. Aborting test.")
|
|
return 1
|
|
|
|
# Print first 5 deals con detalle
|
|
print()
|
|
print("--- SAMPLE DEALS (first 5) ---")
|
|
for i, d in enumerate(deals[:5], 1):
|
|
print(f"\n Deal [{i}]")
|
|
for k in ["deal_type", "case_number", "auction_date", "starting_bid",
|
|
"estimated_arv", "address", "city", "state", "zip", "county",
|
|
"listing_price"]:
|
|
v = d.get(k)
|
|
print(f" {k}: {v}")
|
|
desc = d.get("listing_description", "")[:120]
|
|
print(f" description: {desc}")
|
|
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
# PASO 2: Validacion estructural
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
print()
|
|
print("--- PASO 2: estructura ---")
|
|
required_fields = ["source", "deal_type", "case_number", "auction_date",
|
|
"county", "listing_price"]
|
|
failures = 0
|
|
for i, d in enumerate(deals):
|
|
for f in required_fields:
|
|
if d.get(f) is None or d.get(f) == "":
|
|
if f == "listing_price":
|
|
# OK si tiene starting_bid pero no listing_price
|
|
if d.get("starting_bid"):
|
|
continue
|
|
print(f" ⚠️ deal {i+1} ({d.get('case_number')}): missing {f}")
|
|
failures += 1
|
|
if failures == 0:
|
|
print(f" ✅ All {len(deals)} deals have required fields")
|
|
else:
|
|
print(f" ⚠️ {failures} field-missing instances across deals")
|
|
|
|
# Field types
|
|
types_ok = True
|
|
for d in deals[:5]:
|
|
if d.get("starting_bid") and not isinstance(d["starting_bid"], (int, float)):
|
|
print(f" ⚠️ starting_bid not numeric in {d.get('case_number')}: {d['starting_bid']!r}")
|
|
types_ok = False
|
|
if d.get("auction_date") and not (isinstance(d["auction_date"], str) and len(d["auction_date"]) == 10):
|
|
print(f" ⚠️ auction_date not ISO YYYY-MM-DD in {d.get('case_number')}")
|
|
types_ok = False
|
|
if types_ok:
|
|
print(" ✅ Field types OK (numeric pricing, ISO dates)")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|