95 lines
3.7 KiB
Python
95 lines
3.7 KiB
Python
"""Deep-dive on the Miami-Dade Realforeclose auction calendar page."""
|
|
from __future__ import annotations
|
|
import io, sys, time
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
|
|
|
PREVIEW_URL = "https://www.miamidade.realforeclose.com/index.cfm?zaction=AUCTION&Zmethod=PREVIEW"
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
context = browser.new_context(
|
|
user_agent=REAL_UA,
|
|
viewport={"width": 1280, "height": 800},
|
|
locale="en-US",
|
|
timezone_id="America/New_York",
|
|
)
|
|
page = context.new_page()
|
|
page.set_default_timeout(20_000)
|
|
|
|
print("Loading PREVIEW page...")
|
|
response = page.goto(PREVIEW_URL, wait_until="networkidle", timeout=25_000)
|
|
print(f"Status: {response.status}, URL: {page.url}")
|
|
print(f"Title: {page.title()}")
|
|
|
|
# Wait extra for any JS rendering
|
|
time.sleep(3)
|
|
|
|
# Inspect what's on the page after JS rendering
|
|
print()
|
|
print("--- ALL TABLES ---")
|
|
tables = page.locator("table").all()
|
|
print(f"Found {len(tables)} tables")
|
|
for i, t in enumerate(tables[:8]):
|
|
try:
|
|
rows = t.locator("tr").all()
|
|
print(f"\n Table [{i}]: {len(rows)} rows")
|
|
for j, r in enumerate(rows[:4]):
|
|
cells = [(c.text_content() or "").strip()[:40] for c in r.locator("td, th").all()]
|
|
print(f" Row {j}: {cells}")
|
|
except Exception as e:
|
|
print(f" Table [{i}] error: {e}")
|
|
|
|
print()
|
|
print("--- DIVs with id or class containing 'auction' / 'sale' / 'calendar' ---")
|
|
selectors_to_probe = [
|
|
"div[id*='auction']", "div[id*='sale']", "div[id*='calendar']",
|
|
"div[class*='auction']", "div[class*='sale']", "div[class*='calendar']",
|
|
"div[class*='content']", "div.AUCTION_DETAILS_DIV",
|
|
]
|
|
for sel in selectors_to_probe:
|
|
try:
|
|
els = page.locator(sel).all()
|
|
if els:
|
|
print(f" {sel}: {len(els)} elements")
|
|
for e in els[:3]:
|
|
text = (e.text_content() or "").strip()[:200]
|
|
if text:
|
|
print(f" → {text!r}")
|
|
except Exception:
|
|
pass
|
|
|
|
print()
|
|
print("--- All links with 'auction', 'sale', 'case' in href or text ---")
|
|
for link in page.locator("a").all()[:80]:
|
|
try:
|
|
href = link.get_attribute("href") or ""
|
|
text = (link.text_content() or "").strip()[:80]
|
|
if any(kw in href.lower() for kw in ["auction", "sale", "case"]) or any(kw in text.lower() for kw in ["auction", "calendar", "sale", "scheduled"]):
|
|
if not href.startswith("javascript:") and not href.startswith("#"):
|
|
print(f" href={href} | text='{text}'")
|
|
except Exception:
|
|
pass
|
|
|
|
# Save the rendered HTML
|
|
html = page.content()
|
|
with open("scripts/_miamidade_preview_rendered.html", "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
print(f"\nRendered HTML saved: scripts/_miamidade_preview_rendered.html ({len(html):,} chars)")
|
|
|
|
# Show key body text snippets
|
|
print()
|
|
print("--- BODY TEXT SNIPPETS (around 'auction' or 'calendar' keywords) ---")
|
|
body_text = page.locator("body").inner_text()
|
|
import re
|
|
for kw in ["Auction Calendar", "Today's Auctions", "Upcoming Auctions", "View Auction", "Scheduled Sales", "Number of Auctions"]:
|
|
idx = body_text.find(kw)
|
|
if idx >= 0:
|
|
print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-50):idx+300]!r}")
|
|
|
|
browser.close()
|