Files
2026-07-03 12:24:58 -04:00

152 lines
4.8 KiB
Python

"""Sub-agente: Escuelas cercanas.
Fuente: GreatSchools.org (scraping) + Overpass API como fallback.
"""
from __future__ import annotations
import time
import requests
from bs4 import BeautifulSoup
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
def run(lat: float, lon: float, address: str) -> dict:
result = {
"schools": [],
"avg_rating": None,
"best_elementary": None,
"best_middle": None,
"best_high": None,
"sources": [],
"errors": [],
}
# --- GreatSchools ---
try:
gs = _greatschools(lat, lon)
result["schools"] = gs
result["sources"].append("GreatSchools.org")
except Exception as e:
result["errors"].append(f"GreatSchools: {e}")
# --- Fallback: Overpass (cuenta escuelas sin rating) ---
if not result["schools"]:
try:
op = _overpass_schools(lat, lon)
result["schools"] = op
result["sources"].append("OpenStreetMap/Overpass")
except Exception as e:
result["errors"].append(f"Overpass schools: {e}")
# Calcular promedio y mejores escuelas
rated = [s for s in result["schools"] if s.get("rating")]
if rated:
result["avg_rating"] = round(sum(s["rating"] for s in rated) / len(rated), 1)
elementary = [s for s in rated if s.get("level") == "elementary"]
middle = [s for s in rated if s.get("level") == "middle"]
high = [s for s in rated if s.get("level") == "high"]
if elementary:
result["best_elementary"] = max(elementary, key=lambda x: x["rating"])
if middle:
result["best_middle"] = max(middle, key=lambda x: x["rating"])
if high:
result["best_high"] = max(high, key=lambda x: x["rating"])
return result
def _greatschools(lat: float, lon: float) -> list:
"""Scraping de GreatSchools para escuelas cercanas."""
url = f"https://www.greatschools.org/search/search.page?lat={lat}&lon={lon}&distance=3&gradeLevels=e,m,h"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml",
}
time.sleep(2)
r = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
schools = []
for card in soup.select(".school-card, [data-school-id]")[:15]:
name_el = card.select_one(".school-name, h2, h3")
rating_el = card.select_one(".gs-rating, [class*='rating']")
level_el = card.select_one(".school-type, .grade-range")
name = name_el.get_text(strip=True) if name_el else "Desconocida"
try:
rating = int(rating_el.get_text(strip=True).split("/")[0]) if rating_el else None
except (ValueError, AttributeError):
rating = None
level_text = level_el.get_text(strip=True).lower() if level_el else ""
if "elementary" in level_text or "k-5" in level_text or "k-6" in level_text:
level = "elementary"
elif "middle" in level_text or "6-8" in level_text:
level = "middle"
elif "high" in level_text or "9-12" in level_text:
level = "high"
else:
level = "other"
schools.append({"name": name, "rating": rating, "level": level, "source": "GreatSchools"})
return schools
def _overpass_schools(lat: float, lon: float, radius_m: int = 4800) -> list:
"""Fallback: escuelas desde OpenStreetMap."""
query = f"""
[out:json][timeout:25];
(
node["amenity"="school"](around:{radius_m},{lat},{lon});
way["amenity"="school"](around:{radius_m},{lat},{lon});
);
out body;
"""
time.sleep(1)
r = requests.post(OVERPASS_URL, data={"data": query},
headers={"User-Agent": USER_AGENT}, timeout=30)
r.raise_for_status()
elements = r.json().get("elements", [])
schools = []
for el in elements[:20]:
tags = el.get("tags", {})
name = tags.get("name", "Escuela sin nombre")
schools.append({"name": name, "rating": None, "level": "other", "source": "OSM"})
return schools
def score(data: dict) -> int:
"""Score 0-100 basado en rating promedio de escuelas."""
avg = data.get("avg_rating")
if avg is None:
count = len(data.get("schools", []))
# Si hay escuelas pero sin rating, score neutral-positivo
return 45 if count == 0 else 55
# GreatSchools rating 1-10 → score 0-100
if avg >= 9:
return 95
elif avg >= 8:
return 85
elif avg >= 7:
return 75
elif avg >= 6:
return 65
elif avg >= 5:
return 55
elif avg >= 4:
return 40
elif avg >= 3:
return 30
else:
return 20