Files
AR-House/scripts/explore_miamidade_realforeclose.py
T
2026-07-03 12:24:58 -04:00

65 lines
2.7 KiB
Python

"""Explore Realforeclose with real Chrome UA + multiple entry paths."""
from __future__ import annotations
import io, sys, time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
from playwright.sync_api import sync_playwright
# Use real Chrome UA to bypass 403 anti-bot
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
URLS_TO_PROBE = [
"https://www.miamidade.realforeclose.com/",
"https://www.miamidade.realforeclose.com/index.cfm",
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW",
"https://www.miamidade.realforeclose.com/index.cfm?zaction=USER&Zmethod=CALENDAR",
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=DISPLAY",
]
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent=REAL_UA,
viewport={"width": 1280, "height": 800},
locale="en-US",
timezone_id="America/New_York",
extra_http_headers={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Sec-Fetch-Site": "none",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-User": "?1",
"Sec-Fetch-Dest": "document",
"Upgrade-Insecure-Requests": "1",
},
)
page = context.new_page()
page.set_default_timeout(20_000)
for url in URLS_TO_PROBE:
print(f"\n=== Probing {url} ===")
try:
response = page.goto(url, wait_until="networkidle", timeout=25_000)
status = response.status if response else "?"
title = page.title()
print(f" status={status}, final_url={page.url}, title={title}")
content = page.content()
print(f" html_len={len(content)}")
if 200 <= status < 400 and len(content) > 500:
print(" → SUCCESS — page loaded with substantial content")
# Show first 500 chars of visible text
body = page.locator("body").inner_text()[:500]
print(f" body_text_preview: {body[:500]!r}")
# Save HTML for inspection
slug = url.split("=")[-1] or "landing"
with open(f"scripts/_miamidade_{slug}.html", "w", encoding="utf-8") as f:
f.write(content)
print(f" HTML saved: scripts/_miamidade_{slug}.html")
break
except Exception as e:
print(f" ERROR: {e}")
browser.close()