feat: Agente-Marketing initial commit
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
Scraper de Zillow usando el perfil real de Chrome.
|
||||
Extrae datos completos del JSON embebido en la página.
|
||||
"""
|
||||
import re, time, random, json, os
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
TEMP_PROFILE = r"C:\Temp\chrome_casa_hunter"
|
||||
|
||||
def human_delay(a=1.5, b=3.5):
|
||||
time.sleep(random.uniform(a, b))
|
||||
|
||||
def slow_scroll(page, steps=4):
|
||||
for _ in range(steps):
|
||||
page.mouse.wheel(0, random.randint(300, 600))
|
||||
time.sleep(random.uniform(0.5, 1.0))
|
||||
|
||||
def parse_zillow_listings(html, max_price=230000, min_price=40000):
|
||||
"""Extrae listings del JSON embebido en Zillow."""
|
||||
results = []
|
||||
|
||||
# Buscar el JSON principal de Zillow (__NEXT_DATA__ o searchResults)
|
||||
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(1))
|
||||
# Navegar en la estructura JSON de Zillow
|
||||
props = data.get("props", {})
|
||||
page_props = props.get("pageProps", {})
|
||||
search = page_props.get("searchPageState", {})
|
||||
cat1 = search.get("cat1", {})
|
||||
search_results = cat1.get("searchResults", {})
|
||||
list_results = search_results.get("listResults", [])
|
||||
|
||||
for prop in list_results:
|
||||
price = prop.get("unformattedPrice", 0)
|
||||
if not (min_price <= price <= max_price):
|
||||
continue
|
||||
results.append({
|
||||
"source": "zillow",
|
||||
"address": prop.get("address", "?"),
|
||||
"price": price,
|
||||
"beds": prop.get("beds", 0),
|
||||
"baths": prop.get("baths", 0),
|
||||
"sqft": prop.get("area", 0),
|
||||
"city": prop.get("addressCity", ""),
|
||||
"state": prop.get("addressState", ""),
|
||||
"zip": prop.get("addressZipcode", ""),
|
||||
"status": prop.get("statusType", ""),
|
||||
"url": "https://www.zillow.com" + prop.get("detailUrl", ""),
|
||||
"img": prop.get("imgSrc", ""),
|
||||
"property_type": prop.get("hdpData", {}).get("homeInfo", {}).get("homeType", ""),
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error parsing __NEXT_DATA__: {e}")
|
||||
|
||||
if not results:
|
||||
# Fallback: buscar unformattedPrice directamente
|
||||
raw_prices = re.findall(r'"unformattedPrice":\s*(\d+)', html)
|
||||
raw_addrs = re.findall(r'"address":\s*"([^"]+)"', html)
|
||||
for i, (p, a) in enumerate(zip(raw_prices, raw_addrs)):
|
||||
price = int(p)
|
||||
if min_price <= price <= max_price:
|
||||
results.append({"source": "zillow", "price": price, "address": a})
|
||||
|
||||
return results
|
||||
|
||||
cities_fl = [
|
||||
("Vero Beach FL", "https://www.zillow.com/homes/for_sale/vero-beach-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
|
||||
("Jacksonville FL", "https://www.zillow.com/homes/for_sale/jacksonville-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
|
||||
("Melbourne FL", "https://www.zillow.com/homes/for_sale/melbourne-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
|
||||
("Stuart FL", "https://www.zillow.com/homes/for_sale/stuart-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
|
||||
]
|
||||
|
||||
all_results = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
ctx = p.chromium.launch_persistent_context(
|
||||
user_data_dir=TEMP_PROFILE,
|
||||
executable_path=CHROME_PATH,
|
||||
headless=False,
|
||||
args=[
|
||||
"--profile-directory=Default",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--start-maximized",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
],
|
||||
viewport={"width": 1366, "height": 768},
|
||||
)
|
||||
page = ctx.new_page()
|
||||
|
||||
for city, url in cities_fl:
|
||||
print(f"\n--- {city} ---")
|
||||
try:
|
||||
page.goto(url, wait_until="load", timeout=45000)
|
||||
human_delay(2, 3)
|
||||
slow_scroll(page, 4)
|
||||
human_delay(1, 2)
|
||||
|
||||
html = page.content()
|
||||
title = page.title()
|
||||
print(f"Título: {title[:60]}")
|
||||
|
||||
listings = parse_zillow_listings(html)
|
||||
print(f"Listings encontrados: {len(listings)}")
|
||||
for l in listings[:5]:
|
||||
print(f" ${l['price']:,} | {l.get('beds','?')}bd | {l.get('address','?')[:50]}, {l.get('city','?')}, {l.get('state','?')}")
|
||||
all_results.extend(listings)
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR en {city}: {e}")
|
||||
|
||||
ctx.close()
|
||||
|
||||
print(f"\n=== TOTAL: {len(all_results)} listings en rango $40K-$230K ===")
|
||||
with open("zillow_all.json", "w", encoding="utf-8") as f:
|
||||
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
||||
print("Guardado en zillow_all.json")
|
||||
Reference in New Issue
Block a user