Files
Agente-Marketing/casa-hunter/test_playwright.py
T

94 lines
3.6 KiB
Python

import re, json, time
from playwright.sync_api import sync_playwright
def test_zillow():
print("=== Zillow con Playwright ===")
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
args=["--disable-blink-features=AutomationControlled"]
)
ctx = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
locale="en-US",
viewport={"width": 1280, "height": 800},
)
ctx.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
page = ctx.new_page()
try:
page.goto(
"https://www.zillow.com/homes/for_sale/vero-beach-fl/",
wait_until="load", timeout=45000
)
time.sleep(3)
print("Title:", page.title()[:80])
content = page.content()
print("Page size:", len(content))
cards = page.query_selector_all("[data-test='property-card']")
print(f"Property cards: {len(cards)}")
for card in cards[:3]:
txt = card.inner_text()
print(" ", txt[:150].replace('\n', ' | '))
if not cards:
prices = re.findall(r'"unformattedPrice":\s*(\d+)', content)
print("Prices in HTML:", prices[:5])
except Exception as e:
print(f"ERROR: {e}")
browser.close()
def test_realtor():
print("\n=== Realtor.com con Playwright ===")
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
args=["--disable-blink-features=AutomationControlled"]
)
ctx = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
locale="en-US",
)
ctx.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
page = ctx.new_page()
try:
page.goto(
"https://www.realtor.com/realestateandhomes-search/Vero-Beach_FL/price-na-230000",
wait_until="load", timeout=45000
)
time.sleep(3)
print("Title:", page.title()[:80])
cards = page.query_selector_all("[data-testid='property-card-content']")
print(f"Cards (testid): {len(cards)}")
if not cards:
cards = page.query_selector_all(".jsx-1403264941, [class*='PropertyCard']")
print(f"Cards (class): {len(cards)}")
for card in cards[:3]:
txt = card.inner_text()
print(" ", txt[:150].replace('\n', ' | '))
except Exception as e:
print(f"ERROR: {e}")
browser.close()
def test_new_construction():
print("\n=== NewHomeSource (casas nuevas) ===")
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
try:
page.goto(
"https://www.newhomesource.com/homes-for-sale/fl/vero-beach?priceMax=230000",
wait_until="load", timeout=30000
)
time.sleep(2)
content = page.content()
print("Title:", page.title()[:80])
print("Size:", len(content))
prices = re.findall(r'\$[\d,]+', content)
print("Prices:", prices[:8])
except Exception as e:
print(f"ERROR: {e}")
browser.close()
test_zillow()
test_realtor()
test_new_construction()