83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
import re, time, random
|
|
from playwright.sync_api import sync_playwright
|
|
from playwright_stealth import Stealth
|
|
|
|
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
|
|
|
|
def human_delay(a=1.5, b=4.0):
|
|
time.sleep(random.uniform(a, b))
|
|
|
|
def slow_scroll(page, steps=5):
|
|
for _ in range(steps):
|
|
page.mouse.wheel(0, random.randint(250, 600))
|
|
time.sleep(random.uniform(0.4, 1.0))
|
|
|
|
print("=== Zillow con Chrome real + Stealth ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
executable_path=CHROME_PATH,
|
|
headless=False,
|
|
args=[
|
|
"--disable-blink-features=AutomationControlled",
|
|
"--start-maximized",
|
|
"--no-first-run",
|
|
"--no-default-browser-check",
|
|
]
|
|
)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale="en-US",
|
|
timezone_id="America/New_York",
|
|
viewport={"width": 1366, "height": 768},
|
|
)
|
|
page = ctx.new_page()
|
|
Stealth().apply_stealth_sync(page)
|
|
|
|
page.goto("https://www.zillow.com/homes/for_sale/vero-beach-fl/", wait_until="load", timeout=45000)
|
|
human_delay(2, 4)
|
|
slow_scroll(page, 5)
|
|
human_delay(1, 2)
|
|
|
|
print("Title:", page.title()[:80])
|
|
cards = page.query_selector_all("[data-test='property-card']")
|
|
print(f"Cards: {len(cards)}")
|
|
for card in cards[:5]:
|
|
print(" ", card.inner_text()[:130].replace('\n', ' | '))
|
|
|
|
if not cards:
|
|
content = page.content()
|
|
prices = re.findall(r'"unformattedPrice":\s*(\d+)', content)
|
|
zpids = re.findall(r'"zpid":\s*(\d+)', content)
|
|
print("Prices:", prices[:5])
|
|
print("ZPIDs:", zpids[:5])
|
|
print("Blocked?", "Access to this page has been denied" in content)
|
|
|
|
browser.close()
|
|
|
|
print("\n=== Realtor.com con Chrome real ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
executable_path=CHROME_PATH,
|
|
headless=False,
|
|
args=["--disable-blink-features=AutomationControlled", "--start-maximized"]
|
|
)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale="en-US",
|
|
timezone_id="America/New_York",
|
|
)
|
|
page = ctx.new_page()
|
|
Stealth().apply_stealth_sync(page)
|
|
|
|
page.goto("https://www.realtor.com/realestateandhomes-search/Vero-Beach_FL/price-na-230000", wait_until="load", timeout=45000)
|
|
human_delay(2, 4)
|
|
slow_scroll(page, 5)
|
|
|
|
print("Title:", page.title()[:80])
|
|
cards = page.query_selector_all("[data-testid='property-card-content']")
|
|
print(f"Cards: {len(cards)}")
|
|
for card in cards[:3]:
|
|
print(" ", card.inner_text()[:130].replace('\n', ' | '))
|
|
|
|
browser.close()
|