60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
"""HUD Homestore scraper smoke test (FL state only)."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
from pathlib import Path
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
def main() -> int:
|
|
print("=" * 70)
|
|
print("HUD Homestore scraper smoke test (FL)")
|
|
print("=" * 70)
|
|
|
|
from scrapers.hud_homestore import scrape_hud_homestore
|
|
|
|
def log(m: str) -> None:
|
|
print(f" {m}")
|
|
|
|
t0 = time.perf_counter()
|
|
deals = scrape_hud_homestore(states=["FL"], status_cb=log)
|
|
elapsed = time.perf_counter() - t0
|
|
print(f"\nResult: {len(deals)} deals in {elapsed:.1f}s")
|
|
|
|
if not deals:
|
|
print("❌ NO DEALS scraped. Aborting.")
|
|
return 1
|
|
|
|
print()
|
|
print("--- SAMPLE DEALS (first 5) ---")
|
|
for i, d in enumerate(deals[:5], 1):
|
|
print(f"\n [{i}] {d.get('case_number')}")
|
|
for k in ("deal_type", "auction_date", "address", "city", "county",
|
|
"state", "zip", "listing_price", "beds", "baths"):
|
|
print(f" {k}: {d.get(k)}")
|
|
print(f" desc: {(d.get('listing_description') or '')[:120]}")
|
|
|
|
# Validate required fields
|
|
print()
|
|
print("--- VALIDATION ---")
|
|
required = ["source", "deal_type", "case_number", "listing_price", "address"]
|
|
failures = 0
|
|
for i, d in enumerate(deals):
|
|
for f in required:
|
|
if not d.get(f):
|
|
failures += 1
|
|
if failures <= 5:
|
|
print(f" ⚠️ deal {i+1} ({d.get('case_number')}): missing {f}")
|
|
if failures == 0:
|
|
print(f" ✅ All {len(deals)} deals have required fields")
|
|
else:
|
|
print(f" ⚠️ {failures} field-missing instances")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|