Files
Agente-Marketing/casa-hunter/test_craigslist.py
T

65 lines
2.9 KiB
Python

import requests, re
from bs4 import BeautifulSoup
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
# Craigslist Florida markets for our target zones
MARKETS = {
"Treasure Coast": "https://treasure.craigslist.org", # Stuart, Vero Beach, Fort Pierce
"Space Coast": "https://spacecoast.craigslist.org", # Melbourne, Titusville, Cocoa
"Daytona": "https://daytona.craigslist.org", # Daytona, Ormond, NSB
"Jacksonville": "https://jacksonville.craigslist.org", # Jacksonville, St. Augustine
"Flagler/Volusia": "https://volusia.craigslist.org", # Palm Coast, Flagler Beach
}
print("=== Craigslist Florida RSS Test ===\n")
total = 0
for name, base in MARKETS.items():
url = f"{base}/search/rfs?format=rss&max_price=230000&min_price=40000"
try:
r = requests.get(url, headers=headers, timeout=15)
soup = BeautifulSoup(r.text, "xml") if "xml" in r.headers.get("content-type","") else BeautifulSoup(r.text, "html.parser")
items = soup.find_all("item")
print(f"{name}: {len(items)} listings")
for item in items[:2]:
title = item.find("title")
price_el = item.find("price") or item.find("ask")
link = item.find("link")
title_text = title.get_text() if title else "?"
price_text = price_el.get_text() if price_el else re.search(r'\$[\d,]+', title_text or "")
if hasattr(price_text, 'group'):
price_text = price_text.group()
print(f" {title_text[:70]} | {price_text}")
total += len(items)
except Exception as e:
print(f"{name}: ERROR {e}")
print(f"\nTotal listings available: {total}")
# Test Zillow with session/cookie
print("\n=== Zillow with session ===")
try:
session = requests.Session()
session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
})
# First get homepage to get cookies
session.get("https://www.zillow.com", timeout=15)
# Now search
r = session.get(
"https://www.zillow.com/homes/for_sale/Vero-Beach-FL/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%7D%7D%7D",
timeout=20
)
print(f"Status: {r.status_code}, size: {len(r.text)}")
has_data = "__NEXT_DATA__" in r.text
has_listings = "listResults" in r.text or "zpid" in r.text
print(f"Has __NEXT_DATA__: {has_data}, Has listing data: {has_listings}")
if has_listings:
prices = re.findall(r'"unformattedPrice":\s*(\d+)', r.text)
fl_prices = [int(p) for p in prices if 40000 < int(p) <= 230000]
print(f"Prices in range: {fl_prices[:5]}")
except Exception as e:
print(f"ERROR: {e}")