65 lines
2.9 KiB
Python
65 lines
2.9 KiB
Python
import requests, re
|
|
from bs4 import BeautifulSoup
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
|
|
|
# Craigslist Florida markets for our target zones
|
|
MARKETS = {
|
|
"Treasure Coast": "https://treasure.craigslist.org", # Stuart, Vero Beach, Fort Pierce
|
|
"Space Coast": "https://spacecoast.craigslist.org", # Melbourne, Titusville, Cocoa
|
|
"Daytona": "https://daytona.craigslist.org", # Daytona, Ormond, NSB
|
|
"Jacksonville": "https://jacksonville.craigslist.org", # Jacksonville, St. Augustine
|
|
"Flagler/Volusia": "https://volusia.craigslist.org", # Palm Coast, Flagler Beach
|
|
}
|
|
|
|
print("=== Craigslist Florida RSS Test ===\n")
|
|
total = 0
|
|
for name, base in MARKETS.items():
|
|
url = f"{base}/search/rfs?format=rss&max_price=230000&min_price=40000"
|
|
try:
|
|
r = requests.get(url, headers=headers, timeout=15)
|
|
soup = BeautifulSoup(r.text, "xml") if "xml" in r.headers.get("content-type","") else BeautifulSoup(r.text, "html.parser")
|
|
items = soup.find_all("item")
|
|
print(f"{name}: {len(items)} listings")
|
|
for item in items[:2]:
|
|
title = item.find("title")
|
|
price_el = item.find("price") or item.find("ask")
|
|
link = item.find("link")
|
|
title_text = title.get_text() if title else "?"
|
|
price_text = price_el.get_text() if price_el else re.search(r'\$[\d,]+', title_text or "")
|
|
if hasattr(price_text, 'group'):
|
|
price_text = price_text.group()
|
|
print(f" {title_text[:70]} | {price_text}")
|
|
total += len(items)
|
|
except Exception as e:
|
|
print(f"{name}: ERROR {e}")
|
|
|
|
print(f"\nTotal listings available: {total}")
|
|
|
|
# Test Zillow with session/cookie
|
|
print("\n=== Zillow with session ===")
|
|
try:
|
|
session = requests.Session()
|
|
session.headers.update({
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
})
|
|
# First get homepage to get cookies
|
|
session.get("https://www.zillow.com", timeout=15)
|
|
# Now search
|
|
r = session.get(
|
|
"https://www.zillow.com/homes/for_sale/Vero-Beach-FL/?searchQueryState=%7B%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A230000%7D%7D%7D",
|
|
timeout=20
|
|
)
|
|
print(f"Status: {r.status_code}, size: {len(r.text)}")
|
|
has_data = "__NEXT_DATA__" in r.text
|
|
has_listings = "listResults" in r.text or "zpid" in r.text
|
|
print(f"Has __NEXT_DATA__: {has_data}, Has listing data: {has_listings}")
|
|
if has_listings:
|
|
prices = re.findall(r'"unformattedPrice":\s*(\d+)', r.text)
|
|
fl_prices = [int(p) for p in prices if 40000 < int(p) <= 230000]
|
|
print(f"Prices in range: {fl_prices[:5]}")
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|