"""Find the deep-link URL pattern for HUD property detail pages."""
from __future__ import annotations
import io, sys, time, re
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")

from playwright.sync_api import sync_playwright

REAL_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
           "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True)
    page = browser.new_context(
        user_agent=REAL_UA, viewport={"width": 1400, "height": 900},
    ).new_page()
    page.set_default_timeout(30_000)
    page.goto("https://www.hudhomestore.gov/", wait_until="networkidle")
    time.sleep(2)
    page.goto("https://www.hudhomestore.gov/searchresult?citystate=FL", wait_until="networkidle")
    time.sleep(6)

    # Find all links inside cards
    print("=== ALL <a> elements inside property cards ===")
    cards = page.locator("div.topMap-card.card-body").all()
    print(f"Found {len(cards)} cards")

    # Inspect the FIRST card in detail
    if cards:
        c1 = cards[0]
        print()
        print("--- CARD #1 HTML structure (first 2000 chars) ---")
        html = c1.evaluate("(el) => el.outerHTML")
        # Filter out script/style noise
        cleaned = re.sub(r"\s+", " ", html)[:2500]
        print(cleaned)

        print()
        print("--- CARD #1 ALL <a> hrefs ---")
        anchors = c1.locator("a").all()
        for i, a in enumerate(anchors[:15]):
            href = a.get_attribute("href") or ""
            text = (a.text_content() or "").strip()[:60]
            print(f"  [{i}] href={href} | text='{text}'")

        # Also look for onclick handlers + data attributes
        print()
        print("--- CARD #1 elements with onclick / data-* ---")
        clickables = c1.locator("[onclick], [data-href], [data-url], [data-link], [data-property]").all()
        for el in clickables[:10]:
            attrs = page.evaluate("""(el) => {
                const out = {};
                for (const a of el.attributes) out[a.name] = a.value;
                return out;
            }""", el.element_handle())
            print(f"  {el.evaluate('(el) => el.tagName')}: {attrs}")

    # Check if there's a global pattern for property detail URLs in the page
    print()
    print("=== Looking for '/propertydetails' / '/Listing' anywhere in page ===")
    full_html = page.content()
    # Find href patterns
    urls = re.findall(
        r'href="([^"]*(?:propertydetail|propertyDetail|listing/PropertyDetail|case[Nn]umber)[^"]*)"',
        full_html, re.IGNORECASE,
    )
    for u in set(urls[:10]):
        print(f"  {u}")

    # Also look for data attribs with case#
    case_links = re.findall(
        r'(href|data-[a-z]+)="([^"]*093-?\d{6}[^"]*)"',
        full_html, re.IGNORECASE,
    )
    print(f"\nLinks containing a case number (093-XXXXXX):")
    for attr, url in case_links[:8]:
        print(f"  {attr}={url}")

    browser.close()