"""Probe tomorrow's auction to see real case listings + nail down the parse structure.""" from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") # Probe multiple dates to find one with real cases DATES_TO_PROBE = [ "05/14/2026", "05/15/2026", "05/16/2026", "05/19/2026", "05/20/2026", "05/21/2026", ] with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent=REAL_UA, viewport={"width": 1280, "height": 800}, locale="en-US", timezone_id="America/New_York", ) page = context.new_page() page.set_default_timeout(20_000) for date in DATES_TO_PROBE: url = f"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&zmethod=PREVIEW&AuctionDate={date}" print(f"\n=== {date} ===") try: response = page.goto(url, wait_until="networkidle", timeout=20_000) time.sleep(2) body = page.locator("body").inner_text() # Quick check for cases has_no_cases = "no cases currently being" in body.lower() or "no auction" in body.lower() has_case_number = "case #" in body.lower() or "Case #:" in body # Count Case # occurrences as proxy for # of cases case_count = body.count("Case #:") + body.count("Case #") print(f" status={response.status} | has_no_cases_text={has_no_cases} | Case # markers found: {case_count}") if case_count > 1 or (has_case_number and not has_no_cases): # Save this HTML for detailed inspection with open(f"scripts/_mdc_auction_{date.replace('/', '-')}.html", "w", encoding="utf-8") as f: f.write(page.content()) print(f" → SAVED: scripts/_mdc_auction_{date.replace('/', '-')}.html") # Print first few rows of the case tables tables = page.locator("table").all() print(f" Tables: {len(tables)}") for ti, t in enumerate(tables[:3]): rows = t.locator("tr").all() print(f" Table [{ti}] rows={len(rows)}") for ri, r in enumerate(rows[:20]): cells = [(c.text_content() or "").strip()[:50] for c in r.locator("td, th").all()] non_empty = [c for c in cells if c] if non_empty: print(f" Row {ri}: {non_empty}") break # found a date with cases except Exception as e: print(f" ERROR: {e}") browser.close()