feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
"""Explore Realforeclose with real Chrome UA + multiple entry paths."""
|
||||
from __future__ import annotations
|
||||
import io, sys, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# Use real Chrome UA to bypass 403 anti-bot
|
||||
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
|
||||
URLS_TO_PROBE = [
|
||||
"https://www.miamidade.realforeclose.com/",
|
||||
"https://www.miamidade.realforeclose.com/index.cfm",
|
||||
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW",
|
||||
"https://www.miamidade.realforeclose.com/index.cfm?zaction=USER&Zmethod=CALENDAR",
|
||||
"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=DISPLAY",
|
||||
]
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context(
|
||||
user_agent=REAL_UA,
|
||||
viewport={"width": 1280, "height": 800},
|
||||
locale="en-US",
|
||||
timezone_id="America/New_York",
|
||||
extra_http_headers={
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
},
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(20_000)
|
||||
|
||||
for url in URLS_TO_PROBE:
|
||||
print(f"\n=== Probing {url} ===")
|
||||
try:
|
||||
response = page.goto(url, wait_until="networkidle", timeout=25_000)
|
||||
status = response.status if response else "?"
|
||||
title = page.title()
|
||||
print(f" status={status}, final_url={page.url}, title={title}")
|
||||
content = page.content()
|
||||
print(f" html_len={len(content)}")
|
||||
if 200 <= status < 400 and len(content) > 500:
|
||||
print(" → SUCCESS — page loaded with substantial content")
|
||||
# Show first 500 chars of visible text
|
||||
body = page.locator("body").inner_text()[:500]
|
||||
print(f" body_text_preview: {body[:500]!r}")
|
||||
# Save HTML for inspection
|
||||
slug = url.split("=")[-1] or "landing"
|
||||
with open(f"scripts/_miamidade_{slug}.html", "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
print(f" HTML saved: scripts/_miamidade_{slug}.html")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
|
||||
browser.close()
|
||||
Reference in New Issue
Block a user