65 lines
2.7 KiB
Python
65 lines
2.7 KiB
Python
"""Explore Realforeclose with real Chrome UA + multiple entry paths."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
# Use real Chrome UA to bypass 403 anti-bot
|
|
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
|
|
|
URLS_TO_PROBE = [
|
|
"https://www.miamidade.realforeclose.com/",
|
|
"https://www.miamidade.realforeclose.com/index.cfm",
|
|
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW",
|
|
"https://www.miamidade.realforeclose.com/index.cfm?zaction=USER&Zmethod=CALENDAR",
|
|
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=DISPLAY",
|
|
]
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(
|
|
user_agent=REAL_UA,
|
|
viewport={"width": 1280, "height": 800},
|
|
locale="en-US",
|
|
timezone_id="America/New_York",
|
|
extra_http_headers={
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Sec-Fetch-Site": "none",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-User": "?1",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
},
|
|
)
|
|
page = context.new_page()
|
|
page.set_default_timeout(20_000)
|
|
|
|
for url in URLS_TO_PROBE:
|
|
print(f"\n=== Probing {url} ===")
|
|
try:
|
|
response = page.goto(url, wait_until="networkidle", timeout=25_000)
|
|
status = response.status if response else "?"
|
|
title = page.title()
|
|
print(f" status={status}, final_url={page.url}, title={title}")
|
|
content = page.content()
|
|
print(f" html_len={len(content)}")
|
|
if 200 <= status < 400 and len(content) > 500:
|
|
print(" → SUCCESS — page loaded with substantial content")
|
|
# Show first 500 chars of visible text
|
|
body = page.locator("body").inner_text()[:500]
|
|
print(f" body_text_preview: {body[:500]!r}")
|
|
# Save HTML for inspection
|
|
slug = url.split("=")[-1] or "landing"
|
|
with open(f"scripts/_miamidade_{slug}.html", "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f" HTML saved: scripts/_miamidade_{slug}.html")
|
|
break
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
|
|
browser.close()
|