""" Usa el perfil real de Chrome (con sesión de Google) para scrapear Zillow y Realtor.com. Primero copia el perfil a un directorio temporal para no corromper el original. """ import re, time, random, shutil, os, json from playwright.sync_api import sync_playwright CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe" CHROME_PROFILE = r"C:\Users\aerom\AppData\Local\Google\Chrome\User Data" TEMP_PROFILE = r"C:\Temp\chrome_casa_hunter" def human_delay(a=1.5, b=3.5): time.sleep(random.uniform(a, b)) def slow_scroll(page, steps=4): for _ in range(steps): page.mouse.wheel(0, random.randint(300, 600)) time.sleep(random.uniform(0.4, 0.9)) def wait_for_content(page, selector, timeout=60): print(f" Esperando '{selector}' (max {timeout}s)...") start = time.time() while time.time() - start < timeout: try: items = page.query_selector_all(selector) if items: return items except Exception: pass time.sleep(2) return [] # Copiar perfil si no existe el temporal if not os.path.exists(TEMP_PROFILE): print("Copiando perfil de Chrome (solo Default)...") os.makedirs(TEMP_PROFILE, exist_ok=True) src_default = os.path.join(CHROME_PROFILE, "Default") dst_default = os.path.join(TEMP_PROFILE, "Default") # Solo copiar archivos de sesión, no caché (para ir más rápido) for item in ["Cookies", "Login Data", "Web Data", "Preferences"]: src = os.path.join(src_default, item) if os.path.exists(src): os.makedirs(dst_default, exist_ok=True) shutil.copy2(src, dst_default) print("Perfil copiado.") else: print("Usando perfil temporal existente.") print("\n=== Zillow con sesión de Google ===") with sync_playwright() as p: ctx = p.chromium.launch_persistent_context( user_data_dir=TEMP_PROFILE, executable_path=CHROME_PATH, headless=False, args=[ "--profile-directory=Default", "--disable-blink-features=AutomationControlled", "--start-maximized", "--no-first-run", "--no-default-browser-check", ], viewport={"width": 1366, "height": 768}, ) page = ctx.new_page() # Ir a Zillow directamente print("Navegando a Zillow...") try: page.goto( "https://www.zillow.com/homes/for_sale/vero-beach-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%7D%7D%7D", wait_until="load", timeout=45000 ) except Exception as e: print(f"Timeout en load (continuando): {e}") human_delay(2, 3) # Si hay captcha Cloudflare, esperar que el usuario lo resuelva print("Título actual:", page.title()[:60]) if "denied" in page.title().lower() or "verification" in page.title().lower(): print(">> Cloudflare challenge detectado. Resuélvelo en el browser (90s)...") time.sleep(90) slow_scroll(page, 5) human_delay(1, 2) # Buscar listings cards = wait_for_content(page, "[data-test='property-card']", timeout=30) print(f"\nListings Zillow: {len(cards)}") results = [] for card in cards[:10]: txt = card.inner_text() price_m = re.search(r'\$[\d,]+', txt) addr_lines = txt.strip().split('\n') price = price_m.group() if price_m else "?" addr = addr_lines[0] if addr_lines else "?" print(f" {price} | {addr[:60]}") results.append({"price": price, "address": addr}) if results: with open("zillow_results.json", "w") as f: json.dump(results, f, indent=2) print(f"\nGuardado en zillow_results.json ({len(results)} propiedades)") else: content = page.content() prices = re.findall(r'"unformattedPrice":\s*(\d+)', content) print("Precios en HTML:", prices[:5]) input("\n[Enter para ir a Realtor.com]") print("\n=== Realtor.com con sesión de Google ===") page.goto( "https://www.realtor.com/realestateandhomes-search/Vero-Beach_FL/price-na-230000", wait_until="load", timeout=45000 ) human_delay(2, 3) slow_scroll(page, 5) cards2 = wait_for_content(page, "[data-testid='property-card-content']", timeout=30) print(f"\nListings Realtor: {len(cards2)}") results2 = [] for card in cards2[:10]: txt = card.inner_text() price_m = re.search(r'\$[\d,]+', txt) lines = txt.strip().split('\n') price = price_m.group() if price_m else "?" addr = lines[0] if lines else "?" print(f" {price} | {addr[:60]}") results2.append({"price": price, "address": addr}) if results2: with open("realtor_results.json", "w") as f: json.dump(results2, f, indent=2) print(f"Guardado en realtor_results.json ({len(results2)} propiedades)") input("\n[Enter para cerrar el browser]") ctx.close()