feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
"""Probe Hillsborough PA structure for one real parcel."""
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
|
||||
def probe():
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
out_dir = Path(__file__).parent.parent / "_probe_out" / "hcpa"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
parcel = "1932071V5000000002960U" # 609 NW 1ST AVE
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_context(
|
||||
user_agent="Mozilla/5.0 Chrome/131"
|
||||
).new_page()
|
||||
|
||||
# Try deep link with folio
|
||||
urls_to_try = [
|
||||
f"https://gis.hcpafl.org/propertysearch/#/nav/Folio/{parcel}",
|
||||
f"https://gis.hcpafl.org/propertysearch/#/Map/Folio/{parcel}",
|
||||
f"https://www.hcpafl.org/property/{parcel}",
|
||||
]
|
||||
for url in urls_to_try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
||||
time.sleep(7)
|
||||
body = page.inner_text("body")
|
||||
print(f"[{url}] body len: {len(body)}")
|
||||
if "owner" in body.lower() and ("year built" in body.lower() or "year" in body.lower()):
|
||||
print(f" HIT! Showing property details")
|
||||
snippet = body[:1500].encode("ascii", "replace").decode("ascii")
|
||||
print(f" Body preview: {snippet[:1200]}")
|
||||
(out_dir / "01_detail.html").write_text(page.content(), encoding="utf-8")
|
||||
page.screenshot(path=str(out_dir / "01_detail.png"), full_page=True)
|
||||
break
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
probe()
|
||||
Reference in New Issue
Block a user