64 lines
2.3 KiB
Python
64 lines
2.3 KiB
Python
"""
|
|
Abre el browser visible y espera a que el usuario resuelva cualquier CAPTCHA.
|
|
El script continua solo cuando detecta listings en la página.
|
|
"""
|
|
import re, time, random
|
|
from playwright.sync_api import sync_playwright
|
|
from playwright_stealth import Stealth
|
|
|
|
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
|
|
|
|
def slow_scroll(page, steps=4):
|
|
for _ in range(steps):
|
|
page.mouse.wheel(0, random.randint(300, 600))
|
|
time.sleep(random.uniform(0.5, 1.2))
|
|
|
|
def wait_for_listings(page, selectors, timeout=90):
|
|
"""Espera hasta que aparezcan listings o se agote el tiempo."""
|
|
print(f"Esperando listings (max {timeout}s)... resuelve cualquier captcha si aparece")
|
|
start = time.time()
|
|
while time.time() - start < timeout:
|
|
for sel in selectors:
|
|
cards = page.query_selector_all(sel)
|
|
if cards:
|
|
return cards
|
|
time.sleep(2)
|
|
return []
|
|
|
|
print("=== Zillow - espera manual ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
executable_path=CHROME_PATH,
|
|
headless=False,
|
|
args=["--disable-blink-features=AutomationControlled", "--start-maximized"]
|
|
)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale="en-US",
|
|
timezone_id="America/New_York",
|
|
viewport={"width": 1366, "height": 768},
|
|
)
|
|
page = ctx.new_page()
|
|
Stealth().apply_stealth_sync(page)
|
|
|
|
print("Navegando a Zillow...")
|
|
page.goto("https://www.zillow.com/homes/for_sale/vero-beach-fl/", wait_until="load", timeout=45000)
|
|
|
|
# Esperar hasta 90 segundos para que el usuario resuelva captcha
|
|
cards = wait_for_listings(page, ["[data-test='property-card']"], timeout=90)
|
|
|
|
print(f"\nListings encontrados: {len(cards)}")
|
|
for card in cards[:5]:
|
|
print(" ", card.inner_text()[:130].replace('\n', ' | '))
|
|
|
|
if not cards:
|
|
content = page.content()
|
|
prices = re.findall(r'"unformattedPrice":\s*(\d+)', content)
|
|
print("Prices in JSON:", prices[:5])
|
|
blocked = "Access to this page has been denied" in content or "cf-browser-verification" in content
|
|
print("Bloqueado:", blocked)
|
|
print("Título:", page.title())
|
|
|
|
input("\nPresiona Enter para cerrar el browser...")
|
|
browser.close()
|