"""Dump Results.aspx HTML to find owner/RE# patterns.""" from __future__ import annotations import io, sys, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") from playwright.sync_api import sync_playwright USER_AGENT = "AR-House/1.0 (real estate investment analysis)" with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context(user_agent=USER_AGENT) page = context.new_page() page.set_default_timeout(20_000) page.goto("https://paopropertysearch.coj.net/Basic/Search.aspx", wait_until="networkidle", timeout=20_000) page.locator("#ctl00_cphBody_tbStreetNumber").fill("3245") page.locator("#ctl00_cphBody_ddStreetPrefix").select_option(value="N") page.locator("#ctl00_cphBody_tbStreetName").fill("PEARL") page.locator("#ctl00_cphBody_ddStreetSuffix").select_option(value="ST") # Submit via form.submit page.evaluate("""() => { const form = document.forms[0] || document.querySelector('form'); form.action = 'Results.aspx'; let hidden = document.createElement('input'); hidden.type = 'hidden'; hidden.name = 'ctl00$cphBody$bSearch'; hidden.value = 'Search'; form.appendChild(hidden); form.submit(); }""") page.wait_for_url("**Results.aspx**", timeout=10_000) page.wait_for_load_state("networkidle", timeout=10_000) print("URL:", page.url) print("Title:", page.title()) print() # Buscar tablas tables = page.locator("table").all() print(f"Tables: {len(tables)}") for i, t in enumerate(tables): try: rows = t.locator("tr").all() print(f"\n--- Table [{i}] ({len(rows)} rows) ---") for j, r in enumerate(rows[:5]): cells = r.locator("td, th").all() texts = [(c.text_content() or "").strip()[:60] for c in cells] print(f" Row {j}: {texts}") except Exception as e: print(f" Error: {e}") # Dump full HTML for parsing html = page.content() with open("scripts/_duval_pa_results.html", "w", encoding="utf-8") as f: f.write(html) print(f"\nFull HTML: scripts/_duval_pa_results.html ({len(html):,} chars)") # Search for any text containing "Owner" or "RE" print("\n--- Text matches for 'Owner', 'RE Number', 'Year Built' ---") body_text = page.locator("body").inner_text() for kw in ["Owner", "RE Number", "RE #", "Year Built", "Just Value", "Assessed Value", "Last Sale"]: if kw in body_text: idx = body_text.find(kw) print(f" '{kw}' at pos {idx}: ...{body_text[max(0,idx-30):idx+100]}...") else: print(f" '{kw}': NOT FOUND") # Buscar que apunten a propiedades individuales links = page.locator("a").all() print(f"\n--- Links ({len(links)} total) ---") for i, l in enumerate(links): href = l.get_attribute("href") or "" if "Property" in href or "RE=" in href or "RealEstate" in href or "Detail" in href: print(f" [{i}] href={href} text='{(l.text_content() or '').strip()[:50]}'") if i > 50: break browser.close()