feat: Agente-Marketing initial commit

This commit is contained in:
2026-07-03 12:23:34 -04:00
commit 293522436a
52 changed files with 13522 additions and 0 deletions
+155
View File
@@ -0,0 +1,155 @@
"""
Scraper Zillow: usa el search box como humano + extrae JSON de la página.
"""
import re, time, random, json
from playwright.sync_api import sync_playwright
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
TEMP_PROFILE = r"C:\Temp\chrome_casa_hunter"
def hd(a=1.0, b=2.5):
time.sleep(random.uniform(a, b))
def scroll(page, steps=4):
for _ in range(steps):
page.mouse.wheel(0, random.randint(250, 550))
time.sleep(random.uniform(0.4, 0.9))
def parse_listings(html, min_p=40000, max_p=230000):
results = []
m = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
if m:
try:
data = json.loads(m.group(1))
list_results = (data.get("props",{}).get("pageProps",{})
.get("searchPageState",{}).get("cat1",{})
.get("searchResults",{}).get("listResults",[]))
for p in list_results:
price = p.get("unformattedPrice", 0)
if min_p <= price <= max_p:
city = p.get("addressCity", "")
state = p.get("addressState", "")
results.append({
"source": "zillow",
"address": p.get("address","?"),
"price": price,
"beds": p.get("beds", 0),
"baths": p.get("baths", 0),
"sqft": p.get("area", 0),
"city": city,
"state": state,
"zip": p.get("addressZipcode",""),
"status": p.get("statusType",""),
"url": "https://www.zillow.com" + p.get("detailUrl",""),
"img": p.get("imgSrc",""),
"type": p.get("hdpData",{}).get("homeInfo",{}).get("homeType",""),
})
except Exception as e:
print(f" Parse error: {e}")
return results
def search_city(page, city_query, max_price=230000):
"""Busca una ciudad en Zillow usando el search box."""
print(f"\n--- Buscando: {city_query} ---")
try:
# Ir a zillow.com
page.goto("https://www.zillow.com", wait_until="load", timeout=30000)
hd(1.5, 2.5)
# Encontrar el search box y escribir la ciudad
search_box = page.query_selector("input[id*='search'], input[placeholder*='address'], input[placeholder*='city']")
if not search_box:
# Probar selectores alternativos
search_box = page.query_selector("#search-box-input, [data-testid='search-input'], input[name='searchQueryState']")
if search_box:
search_box.click()
hd(0.3, 0.6)
page.keyboard.down("Control")
page.keyboard.press("a")
page.keyboard.up("Control")
hd(0.2, 0.4)
page.keyboard.press("Delete")
hd(0.3, 0.5)
# Escribir como humano, caracter por caracter
for char in city_query:
page.keyboard.type(char)
time.sleep(random.uniform(0.07, 0.18))
hd(1.0, 1.8)
page.keyboard.press("Enter")
page.wait_for_load_state("load", timeout=30000)
hd(2, 3)
scroll(page, 4)
hd(1, 2)
else:
# Si no encuentra search box, usar URL directamente
city_slug = city_query.lower().replace(" ", "-").replace(",", "")
url = f"https://www.zillow.com/homes/for_sale/{city_slug}/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A{max_price}%2C%22min%22%3A40000%7D%7D%7D"
page.goto(url, wait_until="load", timeout=45000)
hd(2, 3)
scroll(page, 4)
title = page.title()
html = page.content()
print(f" Título: {title[:60]}")
listings = parse_listings(html)
print(f" Encontrados: {len(listings)} en rango $40K-$230K")
for l in listings[:3]:
print(f" ${l['price']:,} | {l.get('beds','?')}bd | {l['address'][:50]}, {l['city']}")
return listings
except Exception as e:
print(f" ERROR: {e}")
return []
# Ciudades objetivo
CITIES = [
"Vero Beach, FL",
"Melbourne, FL",
"Jacksonville, FL",
"Stuart, FL",
"Daytona Beach, FL",
"St. Augustine, FL",
"Palm Coast, FL",
"New Smyrna Beach, FL",
]
all_results = []
with sync_playwright() as p:
ctx = p.chromium.launch_persistent_context(
user_data_dir=TEMP_PROFILE,
executable_path=CHROME_PATH,
headless=False,
args=[
"--profile-directory=Default",
"--disable-blink-features=AutomationControlled",
"--start-maximized",
"--no-first-run",
"--no-default-browser-check",
],
viewport={"width": 1366, "height": 768},
)
page = ctx.new_page()
for city in CITIES:
listings = search_city(page, city)
all_results.extend(listings)
hd(2, 4) # pausa entre ciudades
ctx.close()
# Deduplicar por dirección
seen = set()
unique = []
for r in all_results:
key = r["address"].lower().strip()
if key not in seen:
seen.add(key)
unique.append(r)
print(f"\n=== TOTAL: {len(unique)} listings únicos en $40K-$230K ===")
with open("zillow_final.json", "w", encoding="utf-8") as f:
json.dump(unique, f, indent=2, ensure_ascii=False)
print("Guardado en zillow_final.json")