61 lines
2.8 KiB
Python
61 lines
2.8 KiB
Python
"""Probe tomorrow's auction to see real case listings + nail down the parse structure."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
|
|
|
# Probe multiple dates to find one with real cases
|
|
DATES_TO_PROBE = [
|
|
"05/14/2026", "05/15/2026", "05/16/2026", "05/19/2026", "05/20/2026", "05/21/2026",
|
|
]
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(
|
|
user_agent=REAL_UA, viewport={"width": 1280, "height": 800},
|
|
locale="en-US", timezone_id="America/New_York",
|
|
)
|
|
page = context.new_page()
|
|
page.set_default_timeout(20_000)
|
|
|
|
for date in DATES_TO_PROBE:
|
|
url = f"https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&zmethod=PREVIEW&AuctionDate={date}"
|
|
print(f"\n=== {date} ===")
|
|
try:
|
|
response = page.goto(url, wait_until="networkidle", timeout=20_000)
|
|
time.sleep(2)
|
|
body = page.locator("body").inner_text()
|
|
# Quick check for cases
|
|
has_no_cases = "no cases currently being" in body.lower() or "no auction" in body.lower()
|
|
has_case_number = "case #" in body.lower() or "Case #:" in body
|
|
|
|
# Count Case # occurrences as proxy for # of cases
|
|
case_count = body.count("Case #:") + body.count("Case #")
|
|
|
|
print(f" status={response.status} | has_no_cases_text={has_no_cases} | Case # markers found: {case_count}")
|
|
if case_count > 1 or (has_case_number and not has_no_cases):
|
|
# Save this HTML for detailed inspection
|
|
with open(f"scripts/_mdc_auction_{date.replace('/', '-')}.html", "w", encoding="utf-8") as f:
|
|
f.write(page.content())
|
|
print(f" → SAVED: scripts/_mdc_auction_{date.replace('/', '-')}.html")
|
|
# Print first few rows of the case tables
|
|
tables = page.locator("table").all()
|
|
print(f" Tables: {len(tables)}")
|
|
for ti, t in enumerate(tables[:3]):
|
|
rows = t.locator("tr").all()
|
|
print(f" Table [{ti}] rows={len(rows)}")
|
|
for ri, r in enumerate(rows[:20]):
|
|
cells = [(c.text_content() or "").strip()[:50] for c in r.locator("td, th").all()]
|
|
non_empty = [c for c in cells if c]
|
|
if non_empty:
|
|
print(f" Row {ri}: {non_empty}")
|
|
break # found a date with cases
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
|
|
browser.close()
|