""" Scraper Zillow: usa el search box como humano + extrae JSON de la página. """ import re, time, random, json from playwright.sync_api import sync_playwright CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe" TEMP_PROFILE = r"C:\Temp\chrome_casa_hunter" def hd(a=1.0, b=2.5): time.sleep(random.uniform(a, b)) def scroll(page, steps=4): for _ in range(steps): page.mouse.wheel(0, random.randint(250, 550)) time.sleep(random.uniform(0.4, 0.9)) def parse_listings(html, min_p=40000, max_p=230000): results = [] m = re.search(r']*id="__NEXT_DATA__"[^>]*>(.*?)', html, re.DOTALL) if m: try: data = json.loads(m.group(1)) list_results = (data.get("props",{}).get("pageProps",{}) .get("searchPageState",{}).get("cat1",{}) .get("searchResults",{}).get("listResults",[])) for p in list_results: price = p.get("unformattedPrice", 0) if min_p <= price <= max_p: city = p.get("addressCity", "") state = p.get("addressState", "") results.append({ "source": "zillow", "address": p.get("address","?"), "price": price, "beds": p.get("beds", 0), "baths": p.get("baths", 0), "sqft": p.get("area", 0), "city": city, "state": state, "zip": p.get("addressZipcode",""), "status": p.get("statusType",""), "url": "https://www.zillow.com" + p.get("detailUrl",""), "img": p.get("imgSrc",""), "type": p.get("hdpData",{}).get("homeInfo",{}).get("homeType",""), }) except Exception as e: print(f" Parse error: {e}") return results def search_city(page, city_query, max_price=230000): """Busca una ciudad en Zillow usando el search box.""" print(f"\n--- Buscando: {city_query} ---") try: # Ir a zillow.com page.goto("https://www.zillow.com", wait_until="load", timeout=30000) hd(1.5, 2.5) # Encontrar el search box y escribir la ciudad search_box = page.query_selector("input[id*='search'], input[placeholder*='address'], input[placeholder*='city']") if not search_box: # Probar selectores alternativos search_box = page.query_selector("#search-box-input, [data-testid='search-input'], input[name='searchQueryState']") if search_box: search_box.click() hd(0.3, 0.6) page.keyboard.down("Control") page.keyboard.press("a") page.keyboard.up("Control") hd(0.2, 0.4) page.keyboard.press("Delete") hd(0.3, 0.5) # Escribir como humano, caracter por caracter for char in city_query: page.keyboard.type(char) time.sleep(random.uniform(0.07, 0.18)) hd(1.0, 1.8) page.keyboard.press("Enter") page.wait_for_load_state("load", timeout=30000) hd(2, 3) scroll(page, 4) hd(1, 2) else: # Si no encuentra search box, usar URL directamente city_slug = city_query.lower().replace(" ", "-").replace(",", "") url = f"https://www.zillow.com/homes/for_sale/{city_slug}/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A{max_price}%2C%22min%22%3A40000%7D%7D%7D" page.goto(url, wait_until="load", timeout=45000) hd(2, 3) scroll(page, 4) title = page.title() html = page.content() print(f" Título: {title[:60]}") listings = parse_listings(html) print(f" Encontrados: {len(listings)} en rango $40K-$230K") for l in listings[:3]: print(f" ${l['price']:,} | {l.get('beds','?')}bd | {l['address'][:50]}, {l['city']}") return listings except Exception as e: print(f" ERROR: {e}") return [] # Ciudades objetivo CITIES = [ "Vero Beach, FL", "Melbourne, FL", "Jacksonville, FL", "Stuart, FL", "Daytona Beach, FL", "St. Augustine, FL", "Palm Coast, FL", "New Smyrna Beach, FL", ] all_results = [] with sync_playwright() as p: ctx = p.chromium.launch_persistent_context( user_data_dir=TEMP_PROFILE, executable_path=CHROME_PATH, headless=False, args=[ "--profile-directory=Default", "--disable-blink-features=AutomationControlled", "--start-maximized", "--no-first-run", "--no-default-browser-check", ], viewport={"width": 1366, "height": 768}, ) page = ctx.new_page() for city in CITIES: listings = search_city(page, city) all_results.extend(listings) hd(2, 4) # pausa entre ciudades ctx.close() # Deduplicar por dirección seen = set() unique = [] for r in all_results: key = r["address"].lower().strip() if key not in seen: seen.add(key) unique.append(r) print(f"\n=== TOTAL: {len(unique)} listings únicos en $40K-$230K ===") with open("zillow_final.json", "w", encoding="utf-8") as f: json.dump(unique, f, indent=2, ensure_ascii=False) print("Guardado en zillow_final.json")