feat: AR-House initial commit

This commit is contained in:
2026-07-03 12:24:58 -04:00
commit 047c05287a
216 changed files with 127552 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
"""Sub-agentes de investigación de ubicación."""
@@ -0,0 +1,158 @@
"""Sub-agente: Amenities y walkability.
Fuente: Overpass API (OpenStreetMap) — gratuita, sin key requerida.
"""
from __future__ import annotations
import math
import time
import requests
from data_fetchers.base import USER_AGENT
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
CATEGORIES = {
"supermarket": ["supermarket", "grocery"],
"hospital": ["hospital", "clinic", "doctors", "pharmacy"],
"restaurant": ["restaurant", "fast_food", "cafe"],
"park": ["park"],
"gym": ["fitness_centre", "sports_centre"],
"school": ["school", "kindergarten"],
"bank": ["bank", "atm"],
"gas_station": ["fuel"],
}
def run(lat: float, lon: float, address: str) -> dict:
result = {
"categories": {},
"nearest": {},
"walk_score_estimate": None,
"total_amenities": 0,
"sources": ["OpenStreetMap/Overpass"],
"errors": [],
}
try:
amenities = _overpass_amenities(lat, lon)
result["categories"] = amenities["by_category"]
result["nearest"] = amenities["nearest"]
result["total_amenities"] = amenities["total"]
# Walk score estimado (basado en densidad de amenities en 1 milla)
result["walk_score_estimate"] = _estimate_walk_score(amenities)
except Exception as e:
result["errors"].append(f"Overpass amenities: {e}")
return result
def _overpass_amenities(lat: float, lon: float, radius_m: int = 3200) -> dict:
"""Consulta Overpass API para amenities en radio de ~2 millas."""
amenity_values = "|".join(
v for values in CATEGORIES.values() for v in values
)
query = f"""
[out:json][timeout:30];
(
node["amenity"~"{amenity_values}"](around:{radius_m},{lat},{lon});
);
out body;
"""
time.sleep(1)
r = requests.post(OVERPASS_URL, data={"data": query},
headers={"User-Agent": USER_AGENT}, timeout=35)
r.raise_for_status()
elements = r.json().get("elements", [])
by_category: dict = {cat: [] for cat in CATEGORIES}
nearest: dict = {}
for el in elements:
tags = el.get("tags", {})
amenity = tags.get("amenity", "")
name = tags.get("name", amenity)
el_lat = el.get("lat", lat)
el_lon = el.get("lon", lon)
dist = _haversine(lat, lon, el_lat, el_lon)
for cat, values in CATEGORIES.items():
if amenity in values:
by_category[cat].append({"name": name, "dist_miles": round(dist, 2)})
if cat not in nearest or dist < nearest[cat]["dist_miles"]:
nearest[cat] = {"name": name, "dist_miles": round(dist, 2)}
break
# Ordenar por distancia
for cat in by_category:
by_category[cat].sort(key=lambda x: x["dist_miles"])
by_category[cat] = by_category[cat][:5]
total = sum(len(v) for v in by_category.values())
return {"by_category": by_category, "nearest": nearest, "total": total}
def _haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Distancia en millas entre dos coordenadas."""
R = 3958.8 # radio Tierra en millas
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
return R * 2 * math.asin(math.sqrt(a))
def _estimate_walk_score(amenities: dict) -> int:
"""Estima walk score 0-100 basado en densidad y diversidad de amenities."""
cats = amenities["by_category"]
nearest = amenities["nearest"]
score = 0
# Puntos por cercanía de supermercado (más importante)
sup = nearest.get("supermarket", {}).get("dist_miles", 99)
if sup <= 0.25:
score += 25
elif sup <= 0.5:
score += 18
elif sup <= 1.0:
score += 10
elif sup <= 2.0:
score += 5
# Restaurantes/cafes cercanos
rest_count = len([x for x in cats.get("restaurant", []) if x["dist_miles"] <= 1.0])
score += min(20, rest_count * 3)
# Diversidad de categorías con algo en <= 1 milla
cats_nearby = sum(
1 for cat, items in cats.items()
if any(x["dist_miles"] <= 1.0 for x in items)
)
score += cats_nearby * 5
# Hospitales
hosp = nearest.get("hospital", {}).get("dist_miles", 99)
if hosp <= 2.0:
score += 10
return min(100, max(0, score))
def score(data: dict) -> int:
"""Score 0-100 de amenities."""
ws = data.get("walk_score_estimate")
if ws is not None:
return ws
total = data.get("total_amenities", 0)
if total >= 50:
return 85
elif total >= 30:
return 70
elif total >= 15:
return 55
elif total >= 5:
return 40
return 25
+124
View File
@@ -0,0 +1,124 @@
"""Sub-agente: Criminalidad.
Fuentes: SpotCrime (scraping) + FBI UCR API (key opcional).
Retorna datos fail-soft — si falla, devuelve dict vacío con error.
"""
from __future__ import annotations
import os
import time
import requests
from bs4 import BeautifulSoup
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
FBI_API_KEY = os.getenv("FBI_UCR_API_KEY", "")
FBI_BASE = "https://api.usa.gov/crime/fbi/cde"
def run(lat: float, lon: float, address: str) -> dict:
"""Recopila datos de criminalidad para la ubicación."""
result = {
"score_input": {},
"crimes_recent": [],
"crime_types": {},
"trend": "desconocido",
"sources": [],
"errors": [],
}
# --- SpotCrime scraping ---
try:
spot = _spotcrime(lat, lon)
result["crimes_recent"] = spot.get("crimes", [])
result["crime_types"] = spot.get("by_type", {})
result["sources"].append("SpotCrime.com")
except Exception as e:
result["errors"].append(f"SpotCrime: {e}")
# --- FBI UCR API (solo si hay key) ---
if FBI_API_KEY:
try:
fbi = _fbi_ucr(address)
result["fbi_data"] = fbi
result["sources"].append("FBI UCR API")
except Exception as e:
result["errors"].append(f"FBI UCR: {e}")
# Score input: cantidad de crímenes en los últimos 30 días
total = len(result["crimes_recent"])
result["score_input"]["total_crimes_30d"] = total
result["score_input"]["has_violent"] = any(
c.get("type", "").lower() in ("assault", "robbery", "shooting", "homicide")
for c in result["crimes_recent"]
)
return result
def _spotcrime(lat: float, lon: float) -> dict:
"""Scraping básico de SpotCrime para el área."""
url = f"https://spotcrime.com/crimes.json?lat={lat}&lon={lon}&callback=spotcrime"
headers = {"User-Agent": USER_AGENT, "Referer": "https://spotcrime.com"}
time.sleep(2)
r = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
# SpotCrime devuelve JSONP — extraer JSON interior
text = r.text
if text.startswith("spotcrime("):
text = text[len("spotcrime("):-1]
import json
data = json.loads(text)
crimes = data.get("crimes", [])
by_type: dict = {}
for c in crimes:
t = c.get("type", "Other")
by_type[t] = by_type.get(t, 0) + 1
return {"crimes": crimes[:50], "by_type": by_type}
def _fbi_ucr(address: str) -> dict:
"""FBI UCR API — estadísticas por estado/ciudad."""
# Extraer estado de la dirección (últimas 2 letras antes del ZIP)
import re
m = re.search(r",\s*([A-Z]{2})\s+\d{5}", address.upper())
state = m.group(1) if m else "FL"
url = f"{FBI_BASE}/summarized/state/{state}/all?API_KEY={FBI_API_KEY}&from=2020&to=2023"
r = requests.get(url, timeout=DEFAULT_TIMEOUT, headers={"User-Agent": USER_AGENT})
r.raise_for_status()
return r.json()
def score(data: dict) -> int:
"""Calcula score 0-100 de seguridad (100 = muy seguro)."""
if not data or not data.get("score_input"):
return 50 # neutral si no hay datos
total = data["score_input"].get("total_crimes_30d", 0)
has_violent = data["score_input"].get("has_violent", False)
# Base score inversamente proporcional a crímenes
if total == 0:
base = 90
elif total <= 5:
base = 75
elif total <= 15:
base = 60
elif total <= 30:
base = 45
elif total <= 50:
base = 30
else:
base = 15
# Penalización por crimen violento
if has_violent:
base = max(0, base - 15)
return min(100, max(0, base))
@@ -0,0 +1,205 @@
"""Sub-agente: Demografía.
Fuente: Census ACS API (key gratuita en api.census.gov/data/key_signup.html).
Sin key, usa datos estimados por ZIP desde datos públicos del Census.
"""
from __future__ import annotations
import os
import re
import time
import requests
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
CENSUS_KEY = os.getenv("CENSUS_API_KEY", "")
ACS_BASE = "https://api.census.gov/data/2022/acs/acs5"
# Variables ACS5 a consultar
ACS_VARS = {
"B19013_001E": "median_household_income",
"B01002_001E": "median_age",
"B23025_005E": "unemployed",
"B23025_002E": "labor_force",
"B15003_022E": "bachelors_degree",
"B15003_001E": "population_25plus",
"B03002_003E": "white_non_hispanic",
"B03002_004E": "black",
"B03002_012E": "hispanic",
"B03002_006E": "asian",
"B01003_001E": "total_population",
}
def run(lat: float, lon: float, address: str, tract_geoid: str = "",
state_fips: str = "", county_fips: str = "") -> dict:
result = {
"median_household_income": None,
"median_age": None,
"unemployment_rate": None,
"education_bachelors_pct": None,
"ethnicity": {},
"total_population": None,
"sources": [],
"errors": [],
}
# Extraer ZIP para fallback
zip_m = re.search(r"\b(\d{5})\b", address)
zip_code = zip_m.group(1) if zip_m else ""
# --- Census ACS API ---
if CENSUS_KEY and state_fips and county_fips:
try:
acs = _census_acs(state_fips, county_fips, tract_geoid)
result.update(acs)
result["sources"].append("Census ACS 5-Year")
except Exception as e:
result["errors"].append(f"Census ACS: {e}")
else:
result["errors"].append(
"Census API key no configurada. Agregar CENSUS_API_KEY en .env. "
"Obtener en: https://api.census.gov/data/key_signup.html"
)
# --- Fallback: Census ZIP (sin key) ---
if not result["sources"] and zip_code:
try:
z = _census_zip_no_key(zip_code)
result.update(z)
result["sources"].append("Census ZIP (estimado)")
except Exception as e:
result["errors"].append(f"Census ZIP fallback: {e}")
return result
def _census_acs(state_fips: str, county_fips: str, tract_geoid: str) -> dict:
"""Consulta Census ACS 5-Year para un census tract."""
vars_str = ",".join(ACS_VARS.keys())
# Extraer county y tract de GEOID (12 dígitos: SS+CCC+TTTTTT)
county = county_fips[-3:] if len(county_fips) >= 3 else county_fips
state = state_fips[:2]
tract = tract_geoid[-6:] if len(tract_geoid) >= 6 else "*"
params = {
"get": vars_str,
"for": f"tract:{tract}",
"in": f"state:{state} county:{county}",
"key": CENSUS_KEY,
}
headers = {"User-Agent": USER_AGENT}
time.sleep(0.5)
r = requests.get(ACS_BASE, params=params, headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
rows = r.json()
if len(rows) < 2:
return {}
header = rows[0]
vals = rows[1]
data = {ACS_VARS.get(h, h): _safe_int(v) for h, v in zip(header, vals) if h in ACS_VARS}
return _process_acs(data)
def _census_zip_no_key(zip_code: str) -> dict:
"""Census sin key — datos por ZIP usando endpoint público."""
vars_str = "B19013_001E,B01002_001E,B01003_001E"
params = {"get": vars_str, "for": f"zip code tabulation area:{zip_code}"}
r = requests.get(ACS_BASE, params=params,
headers={"User-Agent": USER_AGENT}, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
rows = r.json()
if len(rows) < 2:
return {}
header = rows[0]
vals = rows[1]
raw = dict(zip(header, vals))
return {
"median_household_income": _safe_int(raw.get("B19013_001E")),
"median_age": _safe_int(raw.get("B01002_001E")),
"total_population": _safe_int(raw.get("B01003_001E")),
}
def _process_acs(data: dict) -> dict:
total_pop = data.get("total_population") or 1
labor = data.get("labor_force") or 1
pop25 = data.get("population_25plus") or 1
unemployed = data.get("unemployed") or 0
bachelors = data.get("bachelors_degree") or 0
white = data.get("white_non_hispanic") or 0
black = data.get("black") or 0
hispanic = data.get("hispanic") or 0
asian = data.get("asian") or 0
ethnicity = {
"white_non_hispanic_pct": round(white / total_pop * 100, 1),
"black_pct": round(black / total_pop * 100, 1),
"hispanic_pct": round(hispanic / total_pop * 100, 1),
"asian_pct": round(asian / total_pop * 100, 1),
"other_pct": max(0, round((total_pop - white - black - hispanic - asian) / total_pop * 100, 1)),
}
return {
"median_household_income": data.get("median_household_income"),
"median_age": data.get("median_age"),
"unemployment_rate": round(unemployed / labor * 100, 1) if labor > 0 else None,
"education_bachelors_pct": round(bachelors / pop25 * 100, 1) if pop25 > 0 else None,
"ethnicity": ethnicity,
"total_population": total_pop,
}
def _safe_int(v) -> int | None:
try:
return int(v)
except (TypeError, ValueError):
return None
def score(data: dict) -> int:
"""Score 0-100 basado en indicadores socioeconómicos."""
s = 50
income = data.get("median_household_income")
if income is not None:
if income >= 100000:
s += 20
elif income >= 75000:
s += 12
elif income >= 50000:
s += 5
elif income >= 35000:
s -= 5
else:
s -= 15
unemp = data.get("unemployment_rate")
if unemp is not None:
if unemp <= 3:
s += 15
elif unemp <= 5:
s += 8
elif unemp <= 8:
s += 0
elif unemp <= 12:
s -= 8
else:
s -= 15
edu = data.get("education_bachelors_pct")
if edu is not None:
if edu >= 50:
s += 10
elif edu >= 35:
s += 5
elif edu >= 20:
s += 0
else:
s -= 5
return min(100, max(0, s))
@@ -0,0 +1,160 @@
"""Sub-agente: Estilo de vida náutico.
Fuentes: Overpass API (marinas, boat ramps, playas, acceso al agua).
"""
from __future__ import annotations
import math
import time
import requests
from data_fetchers.base import USER_AGENT
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
def run(lat: float, lon: float, address: str) -> dict:
result = {
"marinas": [],
"boat_ramps": [],
"beaches": [],
"nearest_marina": None,
"nearest_beach": None,
"nearest_boat_ramp": None,
"ocean_access": False,
"waterway_nearby": False,
"sources": ["OpenStreetMap/Overpass"],
"errors": [],
}
try:
data = _overpass_nautical(lat, lon)
result.update(data)
except Exception as e:
result["errors"].append(f"Overpass lifestyle: {e}")
return result
def _overpass_nautical(lat: float, lon: float, radius_m: int = 16000) -> dict:
"""Consulta amenidades náuticas en radio de ~10 millas."""
query = f"""
[out:json][timeout:35];
(
node["leisure"="marina"](around:{radius_m},{lat},{lon});
way["leisure"="marina"](around:{radius_m},{lat},{lon});
node["leisure"="slipway"](around:{radius_m},{lat},{lon});
way["leisure"="slipway"](around:{radius_m},{lat},{lon});
node["natural"="beach"](around:{radius_m},{lat},{lon});
way["natural"="beach"](around:{radius_m},{lat},{lon});
node["waterway"="river"](around:3200,{lat},{lon});
node["natural"="water"](around:3200,{lat},{lon});
way["natural"="coastline"](around:8000,{lat},{lon});
);
out body center;
"""
time.sleep(1)
r = requests.post(OVERPASS_URL, data={"data": query},
headers={"User-Agent": USER_AGENT}, timeout=40)
r.raise_for_status()
elements = r.json().get("elements", [])
marinas, boat_ramps, beaches = [], [], []
waterway_nearby = False
ocean_access = False
for el in elements:
tags = el.get("tags", {})
name = tags.get("name", "Sin nombre")
# Obtener coords
if "center" in el:
el_lat = el["center"]["lat"]
el_lon = el["center"]["lon"]
else:
el_lat = el.get("lat", lat)
el_lon = el.get("lon", lon)
dist = _haversine(lat, lon, el_lat, el_lon)
leisure = tags.get("leisure", "")
natural = tags.get("natural", "")
waterway = tags.get("waterway", "")
if leisure == "marina":
entry = {
"name": name,
"dist_miles": round(dist, 2),
"fuel": tags.get("fuel", "unknown"),
"pump_out": tags.get("pump_out", "unknown"),
"depth": tags.get("maxdraught", tags.get("depth", "unknown")),
}
marinas.append(entry)
elif leisure == "slipway":
boat_ramps.append({"name": name, "dist_miles": round(dist, 2)})
elif natural == "beach":
beaches.append({"name": name, "dist_miles": round(dist, 2)})
elif natural == "coastline":
ocean_access = True
elif waterway in ("river", "canal") or natural in ("water", "bay"):
waterway_nearby = True
marinas.sort(key=lambda x: x["dist_miles"])
boat_ramps.sort(key=lambda x: x["dist_miles"])
beaches.sort(key=lambda x: x["dist_miles"])
return {
"marinas": marinas[:10],
"boat_ramps": boat_ramps[:10],
"beaches": beaches[:10],
"nearest_marina": marinas[0] if marinas else None,
"nearest_beach": beaches[0] if beaches else None,
"nearest_boat_ramp": boat_ramps[0] if boat_ramps else None,
"ocean_access": ocean_access,
"waterway_nearby": waterway_nearby,
}
def _haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
R = 3958.8
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
return R * 2 * math.asin(math.sqrt(a))
def score(data: dict) -> int:
"""Score 0-100 para lifestyle náutico."""
s = 30 # base
nearest_marina = data.get("nearest_marina")
if nearest_marina:
d = nearest_marina["dist_miles"]
if d <= 1:
s += 30
elif d <= 3:
s += 20
elif d <= 5:
s += 12
elif d <= 10:
s += 6
nearest_beach = data.get("nearest_beach")
if nearest_beach:
d = nearest_beach["dist_miles"]
if d <= 1:
s += 20
elif d <= 3:
s += 12
elif d <= 5:
s += 6
if data.get("ocean_access"):
s += 10
if data.get("waterway_nearby"):
s += 5
boat_ramps = len(data.get("boat_ramps", []))
s += min(10, boat_ramps * 2)
return min(100, max(0, s))
+151
View File
@@ -0,0 +1,151 @@
"""Sub-agente: Mercado laboral marítimo.
Fuentes: BLS.gov API (gratuita) + Overpass para instalaciones físicas.
"""
from __future__ import annotations
import time
import requests
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
BLS_BASE = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
# NAICS codes marítimos para BLS
MARITIME_NAICS = {
"483": "Water Transportation",
"4883": "Support Activities for Water Transportation",
"3366": "Ship & Boat Building",
"114": "Fishing, Hunting and Trapping",
}
def run(lat: float, lon: float, address: str, state: str = "FL") -> dict:
result = {
"maritime_employers": [],
"shipyards": [],
"marinas_with_jobs": [],
"bls_employment": {},
"maritime_presence_score": 0,
"sources": [],
"errors": [],
}
# --- Overpass: instalaciones marítimas físicas ---
try:
facilities = _overpass_maritime(lat, lon)
result["shipyards"] = facilities.get("shipyards", [])
result["marinas_with_jobs"] = facilities.get("marinas", [])
result["maritime_employers"] = facilities.get("employers", [])
result["sources"].append("OpenStreetMap/Overpass")
except Exception as e:
result["errors"].append(f"Overpass maritime: {e}")
# --- BLS API (sin key — API v1 gratuita, limitada) ---
try:
bls = _bls_maritime(state)
result["bls_employment"] = bls
result["sources"].append("BLS.gov")
except Exception as e:
result["errors"].append(f"BLS: {e}")
# Presencia marítima general
result["maritime_presence_score"] = (
len(result["shipyards"]) * 15 +
len(result["marinas_with_jobs"]) * 8 +
len(result["maritime_employers"]) * 5
)
return result
def _overpass_maritime(lat: float, lon: float, radius_m: int = 16000) -> dict:
"""Instalaciones marítimas en radio de ~10 millas."""
query = f"""
[out:json][timeout:30];
(
node["industrial"="port"](around:{radius_m},{lat},{lon});
node["waterway"="boatyard"](around:{radius_m},{lat},{lon});
node["leisure"="marina"](around:{radius_m},{lat},{lon});
way["leisure"="marina"](around:{radius_m},{lat},{lon});
node["man_made"="shipyard"](around:{radius_m},{lat},{lon});
node["seamark:type"="harbour"](around:{radius_m},{lat},{lon});
);
out body center;
"""
time.sleep(1)
r = requests.post(OVERPASS_URL, data={"data": query},
headers={"User-Agent": USER_AGENT}, timeout=35)
r.raise_for_status()
elements = r.json().get("elements", [])
shipyards, marinas, employers = [], [], []
for el in elements:
tags = el.get("tags", {})
name = tags.get("name", "Sin nombre")
industrial = tags.get("industrial", "")
waterway = tags.get("waterway", "")
leisure = tags.get("leisure", "")
man_made = tags.get("man_made", "")
if man_made == "shipyard" or waterway == "boatyard":
shipyards.append({"name": name, "type": "shipyard"})
elif leisure == "marina":
marinas.append({"name": name, "type": "marina"})
elif industrial == "port":
employers.append({"name": name, "type": "port"})
return {"shipyards": shipyards[:10], "marinas": marinas[:10], "employers": employers[:10]}
def _bls_maritime(state: str) -> dict:
"""BLS API v1 — empleo en water transportation por estado."""
# Series ID formato: SMU{state_fips}0000004830000001 (Water Transportation)
# Sin key usamos endpoint público v1
series_id = f"SMU120000004830000001" # Florida como default
payload = {
"seriesid": [series_id],
"startyear": "2022",
"endyear": "2024",
}
headers = {"User-Agent": USER_AGENT, "Content-Type": "application/json"}
import json
r = requests.post(BLS_BASE, data=json.dumps(payload),
headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
data = r.json()
series = data.get("Results", {}).get("series", [])
if not series:
return {}
latest = series[0].get("data", [])
if not latest:
return {}
return {
"series_id": series_id,
"latest_employment": latest[0].get("value"),
"period": latest[0].get("period"),
"year": latest[0].get("year"),
"label": "Water Transportation Employment (thousands)",
}
def score(data: dict) -> int:
"""Score 0-100 para mercado laboral marítimo."""
presence = data.get("maritime_presence_score", 0)
if presence >= 60:
return 90
elif presence >= 40:
return 75
elif presence >= 20:
return 60
elif presence >= 10:
return 45
elif presence > 0:
return 35
else:
return 20
+279
View File
@@ -0,0 +1,279 @@
"""Sub-agente: Valoración y mercado inmobiliario.
Fuentes: Zillow (scraping) + County Property Appraiser oficial de Florida.
Soporta todos los condados principales de Florida — se selecciona
automáticamente según el condado geocodificado.
"""
from __future__ import annotations
import re
import sys
import time
from pathlib import Path
import requests
from bs4 import BeautifulSoup
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
# ── Mapa de condados → fetcher / sitio PA oficial ────────────────────────────
# Clave: fragmento del nombre del condado en minúsculas (como devuelve el geocoder)
# Valor: dict con 'fetcher' (función importable) o 'url' (sitio web PA para display)
COUNTY_PA_MAP = {
# Condados con fetcher dedicado en data_fetchers/
"broward": {"fetcher": "data_fetchers.pa_broward:fetch_pa_broward",
"name": "Broward County PA", "site": "https://bcpa.net"},
"miami-dade": {"fetcher": "data_fetchers.pa_miami_dade:fetch_pa_miami_dade",
"name": "Miami-Dade PA", "site": "https://www.miamidade.gov/Apps/PA/propertysearch"},
"palm beach": {"fetcher": "data_fetchers.pa_palm_beach:fetch_pa_palm_beach",
"name": "Palm Beach County PA", "site": "https://pbcpao.gov"},
"duval": {"fetcher": "data_fetchers.pa_duval:fetch_pa_duval",
"name": "Duval County PA (Jacksonville)", "site": "https://www.coj.net/departments/property-appraiser"},
# Condados con sitio PA oficial (scraping genérico o referencia)
"st. johns": {"site": "https://www.sjcpa.us",
"name": "St. Johns County PA (St. Augustine)"},
"saint johns":{"site": "https://www.sjcpa.us",
"name": "St. Johns County PA (St. Augustine)"},
"volusia": {"site": "https://vcpa.volusia.org",
"name": "Volusia County PA (Daytona Beach)"},
"orange": {"site": "https://www.ocpafl.org",
"name": "Orange County PA (Orlando)"},
"hillsborough":{"site": "https://www.hcpafl.org",
"name": "Hillsborough County PA (Tampa)"},
"pinellas": {"site": "https://www.pcpao.gov",
"name": "Pinellas County PA (St. Petersburg/Clearwater)"},
"seminole": {"site": "https://www.scpafl.org",
"name": "Seminole County PA (Sanford/Altamonte)"},
"osceola": {"site": "https://www.property-appraiser.org",
"name": "Osceola County PA (Kissimmee)"},
"brevard": {"site": "https://www.bcpao.us",
"name": "Brevard County PA (Melbourne/Cocoa)"},
"indian river":{"site": "https://www.ircpa.org",
"name": "Indian River County PA (Vero Beach)"},
"martin": {"site": "https://www.pa.martin.fl.us",
"name": "Martin County PA (Stuart/Hobe Sound)"},
"st. lucie": {"site": "https://www.paslc.gov",
"name": "St. Lucie County PA (Port St. Lucie/Fort Pierce)"},
"saint lucie":{"site": "https://www.paslc.gov",
"name": "St. Lucie County PA"},
"lee": {"site": "https://www.leepa.org",
"name": "Lee County PA (Fort Myers/Cape Coral)"},
"collier": {"site": "https://www.collierappraiser.com",
"name": "Collier County PA (Naples/Marco Island)"},
"charlotte": {"site": "https://www.ccappraiser.com",
"name": "Charlotte County PA (Port Charlotte/Punta Gorda)"},
"sarasota": {"site": "https://www.sc-pa.com",
"name": "Sarasota County PA"},
"manatee": {"site": "https://www.manateepao.gov",
"name": "Manatee County PA (Bradenton)"},
"polk": {"site": "https://www.polkpa.org",
"name": "Polk County PA (Lakeland/Winter Haven)"},
"pasco": {"site": "https://www.pascopa.com",
"name": "Pasco County PA (New Port Richey/Wesley Chapel)"},
"hernando": {"site": "https://www.hernandopa-fl.us",
"name": "Hernando County PA (Spring Hill/Brooksville)"},
"citrus": {"site": "https://www.citruspa.org",
"name": "Citrus County PA (Crystal River/Inverness)"},
"marion": {"site": "https://www.pa.marion.fl.us",
"name": "Marion County PA (Ocala)"},
"alachua": {"site": "https://www.acpafl.org",
"name": "Alachua County PA (Gainesville)"},
"putnam": {"site": "https://www.putnam-fl.com/pa",
"name": "Putnam County PA (Palatka)"},
"flagler": {"site": "https://www.flaglerpa.com",
"name": "Flagler County PA (Palm Coast/Flagler Beach)"},
"clay": {"site": "https://www.ccpao.com",
"name": "Clay County PA (Fleming Island/Orange Park)"},
"nassau": {"site": "https://www.nassauflpa.com",
"name": "Nassau County PA (Fernandina Beach/Yulee)"},
"baker": {"site": "https://www.bakerpa.com",
"name": "Baker County PA (Macclenny)"},
"columbia": {"site": "https://www.columbiapafl.com",
"name": "Columbia County PA (Lake City)"},
"leon": {"site": "https://www.leonpa.org",
"name": "Leon County PA (Tallahassee)"},
"escambia": {"site": "https://www.escpa.org",
"name": "Escambia County PA (Pensacola)"},
"santa rosa": {"site": "https://www.srcpa.org",
"name": "Santa Rosa County PA (Milton/Gulf Breeze)"},
"okaloosa": {"site": "https://www.okaloosapa.com",
"name": "Okaloosa County PA (Fort Walton Beach/Destin)"},
"walton": {"site": "https://www.waltonpa.com",
"name": "Walton County PA (Destin/30A/DeFuniak Springs)"},
"bay": {"site": "https://www.baycopa.com",
"name": "Bay County PA (Panama City)"},
"monroe": {"site": "https://www.mcpafl.org",
"name": "Monroe County PA (Florida Keys)"},
"lake": {"site": "https://www.lakepa.org",
"name": "Lake County PA (Leesburg/Tavares/Mount Dora)"},
"sumter": {"site": "https://www.sumterpa.com",
"name": "Sumter County PA (The Villages/Bushnell)"},
}
def _match_county(county: str) -> dict | None:
"""Encuentra el PA info para un condado dado."""
c = county.lower().strip()
# Coincidencia exacta primero
if c in COUNTY_PA_MAP:
return COUNTY_PA_MAP[c]
# Coincidencia parcial
for key, val in COUNTY_PA_MAP.items():
if key in c or c in key:
return val
return None
def run(lat: float, lon: float, address: str, county: str = "") -> dict:
"""Recopila datos de valoración inmobiliaria."""
result = {
"estimated_value": None,
"price_per_sqft": None,
"appreciation_1y": None,
"appreciation_3y": None,
"days_on_market": None,
"median_list_price": None,
"inventory": None,
"county_assessed_value": None,
"county_market_value": None,
"pa_site": None,
"pa_name": None,
"sources": [],
"errors": [],
}
# --- Zillow ---
try:
z = _zillow_neighborhood(lat, lon, address)
result.update({k: v for k, v in z.items() if v is not None})
result["sources"].append("Zillow")
except Exception as e:
result["errors"].append(f"Zillow: {e}")
# --- County Property Appraiser ---
pa_info = _match_county(county)
if pa_info:
result["pa_name"] = pa_info.get("name", "")
result["pa_site"] = pa_info.get("site", "")
fetcher_ref = pa_info.get("fetcher")
if fetcher_ref:
try:
module_path, func_name = fetcher_ref.split(":")
import importlib
mod = importlib.import_module(module_path)
fetch_fn = getattr(mod, func_name)
pa = fetch_fn(address)
if pa:
result["county_assessed_value"] = pa.get("assessed_value")
result["county_market_value"] = pa.get("market_value")
result["sources"].append(pa_info["name"])
except Exception as e:
result["errors"].append(f"{pa_info.get('name','PA')}: {e}")
else:
# Sin fetcher dedicado — registrar el sitio como referencia
result["sources"].append(f"{pa_info['name']} (referencia)")
else:
result["errors"].append(
f"County Appraiser no mapeado para: '{county}'. "
"Consulta manualemente en floridapropertytax.org"
)
return result
def _zillow_neighborhood(lat: float, lon: float, address: str) -> dict:
"""Scraping básico de Zillow para datos del vecindario."""
# Buscar ZIP de la dirección
zip_m = re.search(r"\b(\d{5})\b", address)
zip_code = zip_m.group(1) if zip_m else ""
if not zip_code:
raise ValueError("No se encontró ZIP code en la dirección")
url = f"https://www.zillow.com/homes/{zip_code}_rb/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept-Language": "en-US,en;q=0.9",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
}
time.sleep(3) # rate limiting
r = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
# Extraer datos estructurados si están disponibles (script tags con JSON)
result = {}
for script in soup.find_all("script", type="application/json"):
try:
import json
data = json.loads(script.string or "")
# Buscar datos de precio en la estructura JSON de Zillow
if isinstance(data, dict):
props = data.get("cat1", {}).get("searchResults", {}).get("listResults", [])
if props:
prices = [p.get("price", 0) for p in props if p.get("price")]
if prices:
result["median_list_price"] = sorted(prices)[len(prices) // 2]
sqfts = [p.get("price", 0) / p.get("area", 1)
for p in props if p.get("price") and p.get("area")]
if sqfts:
result["price_per_sqft"] = round(sum(sqfts) / len(sqfts))
doms = [p.get("daysOnZillow", 0) for p in props if p.get("daysOnZillow")]
if doms:
result["days_on_market"] = round(sum(doms) / len(doms))
result["inventory"] = len(props)
except Exception:
continue
return result
def score(data: dict) -> int:
"""Calcula score 0-100 de mercado inmobiliario."""
if not data:
return 50
s = 50 # base
# Apreciación 1 año (si disponible)
app1 = data.get("appreciation_1y")
if app1 is not None:
if app1 >= 10:
s += 20
elif app1 >= 5:
s += 12
elif app1 >= 0:
s += 5
else:
s -= 10
# Días en mercado (menos días = mercado más activo)
dom = data.get("days_on_market")
if dom is not None:
if dom <= 20:
s += 15
elif dom <= 40:
s += 8
elif dom <= 60:
s += 0
else:
s -= 10
# Precio por sqft como indicador de demanda
ppsf = data.get("price_per_sqft")
if ppsf is not None:
if ppsf >= 300:
s += 10
elif ppsf >= 200:
s += 5
return min(100, max(0, s))
+151
View File
@@ -0,0 +1,151 @@
"""Sub-agente: Escuelas cercanas.
Fuente: GreatSchools.org (scraping) + Overpass API como fallback.
"""
from __future__ import annotations
import time
import requests
from bs4 import BeautifulSoup
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
def run(lat: float, lon: float, address: str) -> dict:
result = {
"schools": [],
"avg_rating": None,
"best_elementary": None,
"best_middle": None,
"best_high": None,
"sources": [],
"errors": [],
}
# --- GreatSchools ---
try:
gs = _greatschools(lat, lon)
result["schools"] = gs
result["sources"].append("GreatSchools.org")
except Exception as e:
result["errors"].append(f"GreatSchools: {e}")
# --- Fallback: Overpass (cuenta escuelas sin rating) ---
if not result["schools"]:
try:
op = _overpass_schools(lat, lon)
result["schools"] = op
result["sources"].append("OpenStreetMap/Overpass")
except Exception as e:
result["errors"].append(f"Overpass schools: {e}")
# Calcular promedio y mejores escuelas
rated = [s for s in result["schools"] if s.get("rating")]
if rated:
result["avg_rating"] = round(sum(s["rating"] for s in rated) / len(rated), 1)
elementary = [s for s in rated if s.get("level") == "elementary"]
middle = [s for s in rated if s.get("level") == "middle"]
high = [s for s in rated if s.get("level") == "high"]
if elementary:
result["best_elementary"] = max(elementary, key=lambda x: x["rating"])
if middle:
result["best_middle"] = max(middle, key=lambda x: x["rating"])
if high:
result["best_high"] = max(high, key=lambda x: x["rating"])
return result
def _greatschools(lat: float, lon: float) -> list:
"""Scraping de GreatSchools para escuelas cercanas."""
url = f"https://www.greatschools.org/search/search.page?lat={lat}&lon={lon}&distance=3&gradeLevels=e,m,h"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml",
}
time.sleep(2)
r = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
schools = []
for card in soup.select(".school-card, [data-school-id]")[:15]:
name_el = card.select_one(".school-name, h2, h3")
rating_el = card.select_one(".gs-rating, [class*='rating']")
level_el = card.select_one(".school-type, .grade-range")
name = name_el.get_text(strip=True) if name_el else "Desconocida"
try:
rating = int(rating_el.get_text(strip=True).split("/")[0]) if rating_el else None
except (ValueError, AttributeError):
rating = None
level_text = level_el.get_text(strip=True).lower() if level_el else ""
if "elementary" in level_text or "k-5" in level_text or "k-6" in level_text:
level = "elementary"
elif "middle" in level_text or "6-8" in level_text:
level = "middle"
elif "high" in level_text or "9-12" in level_text:
level = "high"
else:
level = "other"
schools.append({"name": name, "rating": rating, "level": level, "source": "GreatSchools"})
return schools
def _overpass_schools(lat: float, lon: float, radius_m: int = 4800) -> list:
"""Fallback: escuelas desde OpenStreetMap."""
query = f"""
[out:json][timeout:25];
(
node["amenity"="school"](around:{radius_m},{lat},{lon});
way["amenity"="school"](around:{radius_m},{lat},{lon});
);
out body;
"""
time.sleep(1)
r = requests.post(OVERPASS_URL, data={"data": query},
headers={"User-Agent": USER_AGENT}, timeout=30)
r.raise_for_status()
elements = r.json().get("elements", [])
schools = []
for el in elements[:20]:
tags = el.get("tags", {})
name = tags.get("name", "Escuela sin nombre")
schools.append({"name": name, "rating": None, "level": "other", "source": "OSM"})
return schools
def score(data: dict) -> int:
"""Score 0-100 basado en rating promedio de escuelas."""
avg = data.get("avg_rating")
if avg is None:
count = len(data.get("schools", []))
# Si hay escuelas pero sin rating, score neutral-positivo
return 45 if count == 0 else 55
# GreatSchools rating 1-10 → score 0-100
if avg >= 9:
return 95
elif avg >= 8:
return 85
elif avg >= 7:
return 75
elif avg >= 6:
return 65
elif avg >= 5:
return 55
elif avg >= 4:
return 40
elif avg >= 3:
return 30
else:
return 20