feat: Agente-Marketing initial commit

This commit is contained in:
2026-07-03 12:23:34 -04:00
commit 293522436a
52 changed files with 13522 additions and 0 deletions
+120
View File
@@ -0,0 +1,120 @@
"""
Scraper de Zillow usando el perfil real de Chrome.
Extrae datos completos del JSON embebido en la página.
"""
import re, time, random, json, os
from playwright.sync_api import sync_playwright
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
TEMP_PROFILE = r"C:\Temp\chrome_casa_hunter"
def human_delay(a=1.5, b=3.5):
time.sleep(random.uniform(a, b))
def slow_scroll(page, steps=4):
for _ in range(steps):
page.mouse.wheel(0, random.randint(300, 600))
time.sleep(random.uniform(0.5, 1.0))
def parse_zillow_listings(html, max_price=230000, min_price=40000):
"""Extrae listings del JSON embebido en Zillow."""
results = []
# Buscar el JSON principal de Zillow (__NEXT_DATA__ o searchResults)
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
if match:
try:
data = json.loads(match.group(1))
# Navegar en la estructura JSON de Zillow
props = data.get("props", {})
page_props = props.get("pageProps", {})
search = page_props.get("searchPageState", {})
cat1 = search.get("cat1", {})
search_results = cat1.get("searchResults", {})
list_results = search_results.get("listResults", [])
for prop in list_results:
price = prop.get("unformattedPrice", 0)
if not (min_price <= price <= max_price):
continue
results.append({
"source": "zillow",
"address": prop.get("address", "?"),
"price": price,
"beds": prop.get("beds", 0),
"baths": prop.get("baths", 0),
"sqft": prop.get("area", 0),
"city": prop.get("addressCity", ""),
"state": prop.get("addressState", ""),
"zip": prop.get("addressZipcode", ""),
"status": prop.get("statusType", ""),
"url": "https://www.zillow.com" + prop.get("detailUrl", ""),
"img": prop.get("imgSrc", ""),
"property_type": prop.get("hdpData", {}).get("homeInfo", {}).get("homeType", ""),
})
except Exception as e:
print(f"Error parsing __NEXT_DATA__: {e}")
if not results:
# Fallback: buscar unformattedPrice directamente
raw_prices = re.findall(r'"unformattedPrice":\s*(\d+)', html)
raw_addrs = re.findall(r'"address":\s*"([^"]+)"', html)
for i, (p, a) in enumerate(zip(raw_prices, raw_addrs)):
price = int(p)
if min_price <= price <= max_price:
results.append({"source": "zillow", "price": price, "address": a})
return results
cities_fl = [
("Vero Beach FL", "https://www.zillow.com/homes/for_sale/vero-beach-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
("Jacksonville FL", "https://www.zillow.com/homes/for_sale/jacksonville-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
("Melbourne FL", "https://www.zillow.com/homes/for_sale/melbourne-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
("Stuart FL", "https://www.zillow.com/homes/for_sale/stuart-fl/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%2C%22min%22%3A40000%7D%7D%7D"),
]
all_results = []
with sync_playwright() as p:
ctx = p.chromium.launch_persistent_context(
user_data_dir=TEMP_PROFILE,
executable_path=CHROME_PATH,
headless=False,
args=[
"--profile-directory=Default",
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--no-first-run",
"--no-default-browser-check",
],
viewport={"width": 1366, "height": 768},
)
page = ctx.new_page()
for city, url in cities_fl:
print(f"\n--- {city} ---")
try:
page.goto(url, wait_until="load", timeout=45000)
human_delay(2, 3)
slow_scroll(page, 4)
human_delay(1, 2)
html = page.content()
title = page.title()
print(f"Título: {title[:60]}")
listings = parse_zillow_listings(html)
print(f"Listings encontrados: {len(listings)}")
for l in listings[:5]:
print(f" ${l['price']:,} | {l.get('beds','?')}bd | {l.get('address','?')[:50]}, {l.get('city','?')}, {l.get('state','?')}")
all_results.extend(listings)
except Exception as e:
print(f"ERROR en {city}: {e}")
ctx.close()
print(f"\n=== TOTAL: {len(all_results)} listings en rango $40K-$230K ===")
with open("zillow_all.json", "w", encoding="utf-8") as f:
json.dump(all_results, f, indent=2, ensure_ascii=False)
print("Guardado en zillow_all.json")