"""Deep-dive on the Miami-Dade Realforeclose auction calendar page.""" from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") PREVIEW_URL = "https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW" with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent=REAL_UA, viewport={"width": 1280, "height": 800}, locale="en-US", timezone_id="America/New_York", ) page = context.new_page() page.set_default_timeout(20_000) print("Loading PREVIEW page...") response = page.goto(PREVIEW_URL, wait_until="networkidle", timeout=25_000) print(f"Status: {response.status}, URL: {page.url}") print(f"Title: {page.title()}") # Wait extra for any JS rendering time.sleep(3) # Inspect what's on the page after JS rendering print() print("--- ALL TABLES ---") tables = page.locator("table").all() print(f"Found {len(tables)} tables") for i, t in enumerate(tables[:8]): try: rows = t.locator("tr").all() print(f"\n Table [{i}]: {len(rows)} rows") for j, r in enumerate(rows[:4]): cells = [(c.text_content() or "").strip()[:40] for c in r.locator("td, th").all()] print(f" Row {j}: {cells}") except Exception as e: print(f" Table [{i}] error: {e}") print() print("--- DIVs with id or class containing 'auction' / 'sale' / 'calendar' ---") selectors_to_probe = [ "div[id*='auction']", "div[id*='sale']", "div[id*='calendar']", "div[class*='auction']", "div[class*='sale']", "div[class*='calendar']", "div[class*='content']", "div.AUCTION_DETAILS_DIV", ] for sel in selectors_to_probe: try: els = page.locator(sel).all() if els: print(f" {sel}: {len(els)} elements") for e in els[:3]: text = (e.text_content() or "").strip()[:200] if text: print(f" → {text!r}") except Exception: pass print() print("--- All links with 'auction', 'sale', 'case' in href or text ---") for link in page.locator("a").all()[:80]: try: href = link.get_attribute("href") or "" text = (link.text_content() or "").strip()[:80] if any(kw in href.lower() for kw in ["auction", "sale", "case"]) or any(kw in text.lower() for kw in ["auction", "calendar", "sale", "scheduled"]): if not href.startswith("javascript:") and not href.startswith("#"): print(f" href={href} | text='{text}'") except Exception: pass # Save the rendered HTML html = page.content() with open("scripts/_miamidade_preview_rendered.html", "w", encoding="utf-8") as f: f.write(html) print(f"\nRendered HTML saved: scripts/_miamidade_preview_rendered.html ({len(html):,} chars)") # Show key body text snippets print() print("--- BODY TEXT SNIPPETS (around 'auction' or 'calendar' keywords) ---") body_text = page.locator("body").inner_text() import re for kw in ["Auction Calendar", "Today's Auctions", "Upcoming Auctions", "View Auction", "Scheduled Sales", "Number of Auctions"]: idx = body_text.find(kw) if idx >= 0: print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-50):idx+300]!r}") browser.close()