94 lines
3.6 KiB
Python
94 lines
3.6 KiB
Python
import re, json, time
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
def test_zillow():
|
|
print("=== Zillow con Playwright ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
headless=True,
|
|
args=["--disable-blink-features=AutomationControlled"]
|
|
)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale="en-US",
|
|
viewport={"width": 1280, "height": 800},
|
|
)
|
|
ctx.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
page = ctx.new_page()
|
|
try:
|
|
page.goto(
|
|
"https://www.zillow.com/homes/for_sale/vero-beach-fl/",
|
|
wait_until="load", timeout=45000
|
|
)
|
|
time.sleep(3)
|
|
print("Title:", page.title()[:80])
|
|
content = page.content()
|
|
print("Page size:", len(content))
|
|
cards = page.query_selector_all("[data-test='property-card']")
|
|
print(f"Property cards: {len(cards)}")
|
|
for card in cards[:3]:
|
|
txt = card.inner_text()
|
|
print(" ", txt[:150].replace('\n', ' | '))
|
|
if not cards:
|
|
prices = re.findall(r'"unformattedPrice":\s*(\d+)', content)
|
|
print("Prices in HTML:", prices[:5])
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|
|
browser.close()
|
|
|
|
def test_realtor():
|
|
print("\n=== Realtor.com con Playwright ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
headless=True,
|
|
args=["--disable-blink-features=AutomationControlled"]
|
|
)
|
|
ctx = browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale="en-US",
|
|
)
|
|
ctx.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
page = ctx.new_page()
|
|
try:
|
|
page.goto(
|
|
"https://www.realtor.com/realestateandhomes-search/Vero-Beach_FL/price-na-230000",
|
|
wait_until="load", timeout=45000
|
|
)
|
|
time.sleep(3)
|
|
print("Title:", page.title()[:80])
|
|
cards = page.query_selector_all("[data-testid='property-card-content']")
|
|
print(f"Cards (testid): {len(cards)}")
|
|
if not cards:
|
|
cards = page.query_selector_all(".jsx-1403264941, [class*='PropertyCard']")
|
|
print(f"Cards (class): {len(cards)}")
|
|
for card in cards[:3]:
|
|
txt = card.inner_text()
|
|
print(" ", txt[:150].replace('\n', ' | '))
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|
|
browser.close()
|
|
|
|
def test_new_construction():
|
|
print("\n=== NewHomeSource (casas nuevas) ===")
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page()
|
|
try:
|
|
page.goto(
|
|
"https://www.newhomesource.com/homes-for-sale/fl/vero-beach?priceMax=230000",
|
|
wait_until="load", timeout=30000
|
|
)
|
|
time.sleep(2)
|
|
content = page.content()
|
|
print("Title:", page.title()[:80])
|
|
print("Size:", len(content))
|
|
prices = re.findall(r'\$[\d,]+', content)
|
|
print("Prices:", prices[:8])
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|
|
browser.close()
|
|
|
|
test_zillow()
|
|
test_realtor()
|
|
test_new_construction()
|