feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
"""E2E test of Zillow scraper using CACHED markdown (0 Firecrawl credits)."""
|
||||
from __future__ import annotations
|
||||
import io, sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
import data_fetchers # noqa: F401 — load .env
|
||||
|
||||
import scrapers.zillow as zillow_mod
|
||||
import scrapers._cache as cache_mod
|
||||
from scrapers.zillow import run_scraper_to_db
|
||||
from deals_db import list_deals, init_db
|
||||
|
||||
|
||||
def main() -> int:
|
||||
init_db()
|
||||
|
||||
# Pre-populate cache with the test markdown from prior exploration
|
||||
test_url = zillow_mod._build_zillow_url("Miami-Dade", "FL", 1)
|
||||
print(f"Cache target URL: {test_url}")
|
||||
|
||||
md_file = ROOT / "scripts" / "_zillow_miami_md.txt"
|
||||
if not md_file.exists():
|
||||
print(f"❌ Test markdown not found at {md_file}")
|
||||
return 1
|
||||
|
||||
md = md_file.read_text(encoding="utf-8")
|
||||
cache_mod.save_cache(
|
||||
"zillow", test_url, md,
|
||||
status_code=200, ttl_seconds=cache_mod.DEFAULT_TTL_SECONDS_HOURLY,
|
||||
)
|
||||
print(f"Cached: {len(md):,} chars")
|
||||
|
||||
# Run pipeline (cache hit, 0 credits, auto_classify=True to test full flow)
|
||||
print()
|
||||
print("Running zillow.run_scraper_to_db (auto_classify=True — ~5s/deal LLM)...")
|
||||
result = run_scraper_to_db(
|
||||
counties=["Miami-Dade"], state="FL", pages_per_county=1,
|
||||
auto_classify=True,
|
||||
status_cb=lambda m: print(f" {m}"),
|
||||
)
|
||||
print()
|
||||
print("Result:")
|
||||
for k, v in result.items():
|
||||
print(f" {k}: {v}")
|
||||
|
||||
# Verify in DB
|
||||
print()
|
||||
print("=== zillow source in deals.db ===")
|
||||
zd = list_deals(source="zillow", limit=20)
|
||||
print(f"Total zillow deals: {len(zd)}")
|
||||
for d in zd[:5]:
|
||||
addr = (d.get("address") or "?")[:55]
|
||||
price = d.get("listing_price") or 0
|
||||
beds = d.get("beds")
|
||||
baths = d.get("baths")
|
||||
sqft = d.get("sqft")
|
||||
print(f" zpid {d.get('case_number'):<10} | ${price:>11,.0f} | {beds!s:>2}bd/{baths!s:>3}ba/{sqft!s:>5}sqft | {addr}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user