Agente-Marketing/casa-hunter/test_craigslist.py

import requests, re
from bs4 import BeautifulSoup

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

# Craigslist Florida markets for our target zones
MARKETS = {
    "Treasure Coast":  "https://treasure.craigslist.org",   # Stuart, Vero Beach, Fort Pierce
    "Space Coast":     "https://spacecoast.craigslist.org",  # Melbourne, Titusville, Cocoa
    "Daytona":         "https://daytona.craigslist.org",     # Daytona, Ormond, NSB
    "Jacksonville":    "https://jacksonville.craigslist.org", # Jacksonville, St. Augustine
    "Flagler/Volusia": "https://volusia.craigslist.org",     # Palm Coast, Flagler Beach
}

print("=== Craigslist Florida RSS Test ===\n")
total = 0
for name, base in MARKETS.items():
    url = f"{base}/search/rfs?format=rss&max_price=230000&min_price=40000"
    try:
        r = requests.get(url, headers=headers, timeout=15)
        soup = BeautifulSoup(r.text, "xml") if "xml" in r.headers.get("content-type","") else BeautifulSoup(r.text, "html.parser")
        items = soup.find_all("item")
        print(f"{name}: {len(items)} listings")
        for item in items[:2]:
            title = item.find("title")
            price_el = item.find("price") or item.find("ask")
            link = item.find("link")
            title_text = title.get_text() if title else "?"
            price_text = price_el.get_text() if price_el else re.search(r'\$[\d,]+', title_text or "")
            if hasattr(price_text, 'group'):
                price_text = price_text.group()
            print(f"  {title_text[:70]} | {price_text}")
        total += len(items)
    except Exception as e:
        print(f"{name}: ERROR {e}")

print(f"\nTotal listings available: {total}")

# Test Zillow with session/cookie
print("\n=== Zillow with session ===")
try:
    session = requests.Session()
    session.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    })
    # First get homepage to get cookies
    session.get("https://www.zillow.com", timeout=15)
    # Now search
    r = session.get(
        "https://www.zillow.com/homes/for_sale/Vero-Beach-FL/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%7D%7D%7D",
        timeout=20
    )
    print(f"Status: {r.status_code}, size: {len(r.text)}")
    has_data = "__NEXT_DATA__" in r.text
    has_listings = "listResults" in r.text or "zpid" in r.text
    print(f"Has __NEXT_DATA__: {has_data}, Has listing data: {has_listings}")
    if has_listings:
        prices = re.findall(r'"unformattedPrice":\s*(\d+)', r.text)
        fl_prices = [int(p) for p in prices if 40000 < int(p) <= 230000]
        print(f"Prices in range: {fl_prices[:5]}")
except Exception as e:
    print(f"ERROR: {e}")