"""Sub-agente: Escuelas cercanas. Fuente: GreatSchools.org (scraping) + Overpass API como fallback. """ from __future__ import annotations import time import requests from bs4 import BeautifulSoup from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT OVERPASS_URL = "https://overpass-api.de/api/interpreter" def run(lat: float, lon: float, address: str) -> dict: result = { "schools": [], "avg_rating": None, "best_elementary": None, "best_middle": None, "best_high": None, "sources": [], "errors": [], } # --- GreatSchools --- try: gs = _greatschools(lat, lon) result["schools"] = gs result["sources"].append("GreatSchools.org") except Exception as e: result["errors"].append(f"GreatSchools: {e}") # --- Fallback: Overpass (cuenta escuelas sin rating) --- if not result["schools"]: try: op = _overpass_schools(lat, lon) result["schools"] = op result["sources"].append("OpenStreetMap/Overpass") except Exception as e: result["errors"].append(f"Overpass schools: {e}") # Calcular promedio y mejores escuelas rated = [s for s in result["schools"] if s.get("rating")] if rated: result["avg_rating"] = round(sum(s["rating"] for s in rated) / len(rated), 1) elementary = [s for s in rated if s.get("level") == "elementary"] middle = [s for s in rated if s.get("level") == "middle"] high = [s for s in rated if s.get("level") == "high"] if elementary: result["best_elementary"] = max(elementary, key=lambda x: x["rating"]) if middle: result["best_middle"] = max(middle, key=lambda x: x["rating"]) if high: result["best_high"] = max(high, key=lambda x: x["rating"]) return result def _greatschools(lat: float, lon: float) -> list: """Scraping de GreatSchools para escuelas cercanas.""" url = f"https://www.greatschools.org/search/search.page?lat={lat}&lon={lon}&distance=3&gradeLevels=e,m,h" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "text/html,application/xhtml+xml", } time.sleep(2) r = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT) r.raise_for_status() soup = BeautifulSoup(r.text, "html.parser") schools = [] for card in soup.select(".school-card, [data-school-id]")[:15]: name_el = card.select_one(".school-name, h2, h3") rating_el = card.select_one(".gs-rating, [class*='rating']") level_el = card.select_one(".school-type, .grade-range") name = name_el.get_text(strip=True) if name_el else "Desconocida" try: rating = int(rating_el.get_text(strip=True).split("/")[0]) if rating_el else None except (ValueError, AttributeError): rating = None level_text = level_el.get_text(strip=True).lower() if level_el else "" if "elementary" in level_text or "k-5" in level_text or "k-6" in level_text: level = "elementary" elif "middle" in level_text or "6-8" in level_text: level = "middle" elif "high" in level_text or "9-12" in level_text: level = "high" else: level = "other" schools.append({"name": name, "rating": rating, "level": level, "source": "GreatSchools"}) return schools def _overpass_schools(lat: float, lon: float, radius_m: int = 4800) -> list: """Fallback: escuelas desde OpenStreetMap.""" query = f""" [out:json][timeout:25]; ( node["amenity"="school"](around:{radius_m},{lat},{lon}); way["amenity"="school"](around:{radius_m},{lat},{lon}); ); out body; """ time.sleep(1) r = requests.post(OVERPASS_URL, data={"data": query}, headers={"User-Agent": USER_AGENT}, timeout=30) r.raise_for_status() elements = r.json().get("elements", []) schools = [] for el in elements[:20]: tags = el.get("tags", {}) name = tags.get("name", "Escuela sin nombre") schools.append({"name": name, "rating": None, "level": "other", "source": "OSM"}) return schools def score(data: dict) -> int: """Score 0-100 basado en rating promedio de escuelas.""" avg = data.get("avg_rating") if avg is None: count = len(data.get("schools", [])) # Si hay escuelas pero sin rating, score neutral-positivo return 45 if count == 0 else 55 # GreatSchools rating 1-10 → score 0-100 if avg >= 9: return 95 elif avg >= 8: return 85 elif avg >= 7: return 75 elif avg >= 6: return 65 elif avg >= 5: return 55 elif avg >= 4: return 40 elif avg >= 3: return 30 else: return 20