feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+67
View File
@@ -0,0 +1,67 @@
"""E2E test of Zillow scraper using CACHED markdown (0 Firecrawl credits)."""
from __future__ import annotations
import io, sys
from pathlib import Path
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
import data_fetchers # noqa: F401 — load .env
import scrapers.zillow as zillow_mod
import scrapers._cache as cache_mod
from scrapers.zillow import run_scraper_to_db
from deals_db import list_deals, init_db
def main() -> int:
init_db()
# Pre-populate cache with the test markdown from prior exploration
test_url = zillow_mod._build_zillow_url("Miami-Dade", "FL", 1)
print(f"Cache target URL: {test_url}")
md_file = ROOT / "scripts" / "_zillow_miami_md.txt"
if not md_file.exists():
print(f"❌ Test markdown not found at {md_file}")
return 1
md = md_file.read_text(encoding="utf-8")
cache_mod.save_cache(
"zillow", test_url, md,
status_code=200, ttl_seconds=cache_mod.DEFAULT_TTL_SECONDS_HOURLY,
)
print(f"Cached: {len(md):,} chars")
# Run pipeline (cache hit, 0 credits, auto_classify=True to test full flow)
print()
print("Running zillow.run_scraper_to_db (auto_classify=True — ~5s/deal LLM)...")
result = run_scraper_to_db(
counties=["Miami-Dade"], state="FL", pages_per_county=1,
auto_classify=True,
status_cb=lambda m: print(f" {m}"),
)
print()
print("Result:")
for k, v in result.items():
print(f" {k}: {v}")
# Verify in DB
print()
print("=== zillow source in deals.db ===")
zd = list_deals(source="zillow", limit=20)
print(f"Total zillow deals: {len(zd)}")
for d in zd[:5]:
addr = (d.get("address") or "?")[:55]
price = d.get("listing_price") or 0
beds = d.get("beds")
baths = d.get("baths")
sqft = d.get("sqft")
print(f" zpid {d.get('case_number'):<10} | ${price:>11,.0f} | {beds!s:>2}bd/{baths!s:>3}ba/{sqft!s:>5}sqft | {addr}")
return 0
if __name__ == "__main__":
sys.exit(main())