Files

83 lines
2.8 KiB
Python

import re, time, random
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
def human_delay(a=1.5, b=4.0):
time.sleep(random.uniform(a, b))
def slow_scroll(page, steps=5):
for _ in range(steps):
page.mouse.wheel(0, random.randint(250, 600))
time.sleep(random.uniform(0.4, 1.0))
print("=== Zillow con Chrome real + Stealth ===")
with sync_playwright() as p:
browser = p.chromium.launch(
executable_path=CHROME_PATH,
headless=False,
args=[
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--no-first-run",
"--no-default-browser-check",
]
)
ctx = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
locale="en-US",
timezone_id="America/New_York",
viewport={"width": 1366, "height": 768},
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto("https://www.zillow.com/homes/for_sale/vero-beach-fl/", wait_until="load", timeout=45000)
human_delay(2, 4)
slow_scroll(page, 5)
human_delay(1, 2)
print("Title:", page.title()[:80])
cards = page.query_selector_all("[data-test='property-card']")
print(f"Cards: {len(cards)}")
for card in cards[:5]:
print(" ", card.inner_text()[:130].replace('\n', ' | '))
if not cards:
content = page.content()
prices = re.findall(r'"unformattedPrice":\s*(\d+)', content)
zpids = re.findall(r'"zpid":\s*(\d+)', content)
print("Prices:", prices[:5])
print("ZPIDs:", zpids[:5])
print("Blocked?", "Access to this page has been denied" in content)
browser.close()
print("\n=== Realtor.com con Chrome real ===")
with sync_playwright() as p:
browser = p.chromium.launch(
executable_path=CHROME_PATH,
headless=False,
args=["--disable-blink-features=AutomationControlled", "--start-maximized"]
)
ctx = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
locale="en-US",
timezone_id="America/New_York",
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto("https://www.realtor.com/realestateandhomes-search/Vero-Beach_FL/price-na-230000", wait_until="load", timeout=45000)
human_delay(2, 4)
slow_scroll(page, 5)
print("Title:", page.title()[:80])
cards = page.query_selector_all("[data-testid='property-card-content']")
print(f"Cards: {len(cards)}")
for card in cards[:3]:
print(" ", card.inner_text()[:130].replace('\n', ' | '))
browser.close()