"""Explore Realforeclose with real Chrome UA + multiple entry paths.""" from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright # Use real Chrome UA to bypass 403 anti-bot REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") URLS_TO_PROBE = [ "https://www.miamidade.realforeclose.com/", "https://www.miamidade.realforeclose.com/index.cfm", "https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW", "https://www.miamidade.realforeclose.com/index.cfm?zaction=USER&Zmethod=CALENDAR", "https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=DISPLAY", ] with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent=REAL_UA, viewport={"width": 1280, "height": 800}, locale="en-US", timezone_id="America/New_York", extra_http_headers={ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Sec-Fetch-Site": "none", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-User": "?1", "Sec-Fetch-Dest": "document", "Upgrade-Insecure-Requests": "1", }, ) page = context.new_page() page.set_default_timeout(20_000) for url in URLS_TO_PROBE: print(f"\n=== Probing {url} ===") try: response = page.goto(url, wait_until="networkidle", timeout=25_000) status = response.status if response else "?" title = page.title() print(f" status={status}, final_url={page.url}, title={title}") content = page.content() print(f" html_len={len(content)}") if 200 <= status < 400 and len(content) > 500: print(" → SUCCESS — page loaded with substantial content") # Show first 500 chars of visible text body = page.locator("body").inner_text()[:500] print(f" body_text_preview: {body[:500]!r}") # Save HTML for inspection slug = url.split("=")[-1] or "landing" with open(f"scripts/_miamidade_{slug}.html", "w", encoding="utf-8") as f: f.write(content) print(f" HTML saved: scripts/_miamidade_{slug}.html") break except Exception as e: print(f" ERROR: {e}") browser.close()