Files
AR-House/scripts/explore_miamidade_auction_calendar.py
2026-07-03 12:24:58 -04:00

95 lines
3.7 KiB
Python

"""Deep-dive on the Miami-Dade Realforeclose auction calendar page."""
from __future__ import annotations
import io, sys, time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
from playwright.sync_api import sync_playwright
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
PREVIEW_URL = "https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent=REAL_UA,
viewport={"width": 1280, "height": 800},
locale="en-US",
timezone_id="America/New_York",
)
page = context.new_page()
page.set_default_timeout(20_000)
print("Loading PREVIEW page...")
response = page.goto(PREVIEW_URL, wait_until="networkidle", timeout=25_000)
print(f"Status: {response.status}, URL: {page.url}")
print(f"Title: {page.title()}")
# Wait extra for any JS rendering
time.sleep(3)
# Inspect what's on the page after JS rendering
print()
print("--- ALL TABLES ---")
tables = page.locator("table").all()
print(f"Found {len(tables)} tables")
for i, t in enumerate(tables[:8]):
try:
rows = t.locator("tr").all()
print(f"\n Table [{i}]: {len(rows)} rows")
for j, r in enumerate(rows[:4]):
cells = [(c.text_content() or "").strip()[:40] for c in r.locator("td, th").all()]
print(f" Row {j}: {cells}")
except Exception as e:
print(f" Table [{i}] error: {e}")
print()
print("--- DIVs with id or class containing 'auction' / 'sale' / 'calendar' ---")
selectors_to_probe = [
"div[id*='auction']", "div[id*='sale']", "div[id*='calendar']",
"div[class*='auction']", "div[class*='sale']", "div[class*='calendar']",
"div[class*='content']", "div.AUCTION_DETAILS_DIV",
]
for sel in selectors_to_probe:
try:
els = page.locator(sel).all()
if els:
print(f" {sel}: {len(els)} elements")
for e in els[:3]:
text = (e.text_content() or "").strip()[:200]
if text:
print(f" → {text!r}")
except Exception:
pass
print()
print("--- All links with 'auction', 'sale', 'case' in href or text ---")
for link in page.locator("a").all()[:80]:
try:
href = link.get_attribute("href") or ""
text = (link.text_content() or "").strip()[:80]
if any(kw in href.lower() for kw in ["auction", "sale", "case"]) or any(kw in text.lower() for kw in ["auction", "calendar", "sale", "scheduled"]):
if not href.startswith("javascript:") and not href.startswith("#"):
print(f" href={href} | text='{text}'")
except Exception:
pass
# Save the rendered HTML
html = page.content()
with open("scripts/_miamidade_preview_rendered.html", "w", encoding="utf-8") as f:
f.write(html)
print(f"\nRendered HTML saved: scripts/_miamidade_preview_rendered.html ({len(html):,} chars)")
# Show key body text snippets
print()
print("--- BODY TEXT SNIPPETS (around 'auction' or 'calendar' keywords) ---")
body_text = page.locator("body").inner_text()
import re
for kw in ["Auction Calendar", "Today's Auctions", "Upcoming Auctions", "View Auction", "Scheduled Sales", "Number of Auctions"]:
idx = body_text.find(kw)
if idx >= 0:
print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-50):idx+300]!r}")
browser.close()