206 lines
6.1 KiB
Python
206 lines
6.1 KiB
Python
"""Sub-agente: Demografía.
|
|
|
|
Fuente: Census ACS API (key gratuita en api.census.gov/data/key_signup.html).
|
|
Sin key, usa datos estimados por ZIP desde datos públicos del Census.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import time
|
|
import requests
|
|
from data_fetchers.base import USER_AGENT, DEFAULT_TIMEOUT
|
|
|
|
CENSUS_KEY = os.getenv("CENSUS_API_KEY", "")
|
|
ACS_BASE = "https://api.census.gov/data/2022/acs/acs5"
|
|
|
|
# Variables ACS5 a consultar
|
|
ACS_VARS = {
|
|
"B19013_001E": "median_household_income",
|
|
"B01002_001E": "median_age",
|
|
"B23025_005E": "unemployed",
|
|
"B23025_002E": "labor_force",
|
|
"B15003_022E": "bachelors_degree",
|
|
"B15003_001E": "population_25plus",
|
|
"B03002_003E": "white_non_hispanic",
|
|
"B03002_004E": "black",
|
|
"B03002_012E": "hispanic",
|
|
"B03002_006E": "asian",
|
|
"B01003_001E": "total_population",
|
|
}
|
|
|
|
|
|
def run(lat: float, lon: float, address: str, tract_geoid: str = "",
|
|
state_fips: str = "", county_fips: str = "") -> dict:
|
|
result = {
|
|
"median_household_income": None,
|
|
"median_age": None,
|
|
"unemployment_rate": None,
|
|
"education_bachelors_pct": None,
|
|
"ethnicity": {},
|
|
"total_population": None,
|
|
"sources": [],
|
|
"errors": [],
|
|
}
|
|
|
|
# Extraer ZIP para fallback
|
|
zip_m = re.search(r"\b(\d{5})\b", address)
|
|
zip_code = zip_m.group(1) if zip_m else ""
|
|
|
|
# --- Census ACS API ---
|
|
if CENSUS_KEY and state_fips and county_fips:
|
|
try:
|
|
acs = _census_acs(state_fips, county_fips, tract_geoid)
|
|
result.update(acs)
|
|
result["sources"].append("Census ACS 5-Year")
|
|
except Exception as e:
|
|
result["errors"].append(f"Census ACS: {e}")
|
|
else:
|
|
result["errors"].append(
|
|
"Census API key no configurada. Agregar CENSUS_API_KEY en .env. "
|
|
"Obtener en: https://api.census.gov/data/key_signup.html"
|
|
)
|
|
|
|
# --- Fallback: Census ZIP (sin key) ---
|
|
if not result["sources"] and zip_code:
|
|
try:
|
|
z = _census_zip_no_key(zip_code)
|
|
result.update(z)
|
|
result["sources"].append("Census ZIP (estimado)")
|
|
except Exception as e:
|
|
result["errors"].append(f"Census ZIP fallback: {e}")
|
|
|
|
return result
|
|
|
|
|
|
def _census_acs(state_fips: str, county_fips: str, tract_geoid: str) -> dict:
|
|
"""Consulta Census ACS 5-Year para un census tract."""
|
|
vars_str = ",".join(ACS_VARS.keys())
|
|
|
|
# Extraer county y tract de GEOID (12 dígitos: SS+CCC+TTTTTT)
|
|
county = county_fips[-3:] if len(county_fips) >= 3 else county_fips
|
|
state = state_fips[:2]
|
|
tract = tract_geoid[-6:] if len(tract_geoid) >= 6 else "*"
|
|
|
|
params = {
|
|
"get": vars_str,
|
|
"for": f"tract:{tract}",
|
|
"in": f"state:{state} county:{county}",
|
|
"key": CENSUS_KEY,
|
|
}
|
|
headers = {"User-Agent": USER_AGENT}
|
|
time.sleep(0.5)
|
|
r = requests.get(ACS_BASE, params=params, headers=headers, timeout=DEFAULT_TIMEOUT)
|
|
r.raise_for_status()
|
|
rows = r.json()
|
|
if len(rows) < 2:
|
|
return {}
|
|
|
|
header = rows[0]
|
|
vals = rows[1]
|
|
data = {ACS_VARS.get(h, h): _safe_int(v) for h, v in zip(header, vals) if h in ACS_VARS}
|
|
|
|
return _process_acs(data)
|
|
|
|
|
|
def _census_zip_no_key(zip_code: str) -> dict:
|
|
"""Census sin key — datos por ZIP usando endpoint público."""
|
|
vars_str = "B19013_001E,B01002_001E,B01003_001E"
|
|
params = {"get": vars_str, "for": f"zip code tabulation area:{zip_code}"}
|
|
r = requests.get(ACS_BASE, params=params,
|
|
headers={"User-Agent": USER_AGENT}, timeout=DEFAULT_TIMEOUT)
|
|
r.raise_for_status()
|
|
rows = r.json()
|
|
if len(rows) < 2:
|
|
return {}
|
|
header = rows[0]
|
|
vals = rows[1]
|
|
raw = dict(zip(header, vals))
|
|
return {
|
|
"median_household_income": _safe_int(raw.get("B19013_001E")),
|
|
"median_age": _safe_int(raw.get("B01002_001E")),
|
|
"total_population": _safe_int(raw.get("B01003_001E")),
|
|
}
|
|
|
|
|
|
def _process_acs(data: dict) -> dict:
|
|
total_pop = data.get("total_population") or 1
|
|
labor = data.get("labor_force") or 1
|
|
pop25 = data.get("population_25plus") or 1
|
|
|
|
unemployed = data.get("unemployed") or 0
|
|
bachelors = data.get("bachelors_degree") or 0
|
|
white = data.get("white_non_hispanic") or 0
|
|
black = data.get("black") or 0
|
|
hispanic = data.get("hispanic") or 0
|
|
asian = data.get("asian") or 0
|
|
|
|
ethnicity = {
|
|
"white_non_hispanic_pct": round(white / total_pop * 100, 1),
|
|
"black_pct": round(black / total_pop * 100, 1),
|
|
"hispanic_pct": round(hispanic / total_pop * 100, 1),
|
|
"asian_pct": round(asian / total_pop * 100, 1),
|
|
"other_pct": max(0, round((total_pop - white - black - hispanic - asian) / total_pop * 100, 1)),
|
|
}
|
|
|
|
return {
|
|
"median_household_income": data.get("median_household_income"),
|
|
"median_age": data.get("median_age"),
|
|
"unemployment_rate": round(unemployed / labor * 100, 1) if labor > 0 else None,
|
|
"education_bachelors_pct": round(bachelors / pop25 * 100, 1) if pop25 > 0 else None,
|
|
"ethnicity": ethnicity,
|
|
"total_population": total_pop,
|
|
}
|
|
|
|
|
|
def _safe_int(v) -> int | None:
|
|
try:
|
|
return int(v)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def score(data: dict) -> int:
|
|
"""Score 0-100 basado en indicadores socioeconómicos."""
|
|
s = 50
|
|
|
|
income = data.get("median_household_income")
|
|
if income is not None:
|
|
if income >= 100000:
|
|
s += 20
|
|
elif income >= 75000:
|
|
s += 12
|
|
elif income >= 50000:
|
|
s += 5
|
|
elif income >= 35000:
|
|
s -= 5
|
|
else:
|
|
s -= 15
|
|
|
|
unemp = data.get("unemployment_rate")
|
|
if unemp is not None:
|
|
if unemp <= 3:
|
|
s += 15
|
|
elif unemp <= 5:
|
|
s += 8
|
|
elif unemp <= 8:
|
|
s += 0
|
|
elif unemp <= 12:
|
|
s -= 8
|
|
else:
|
|
s -= 15
|
|
|
|
edu = data.get("education_bachelors_pct")
|
|
if edu is not None:
|
|
if edu >= 50:
|
|
s += 10
|
|
elif edu >= 35:
|
|
s += 5
|
|
elif edu >= 20:
|
|
s += 0
|
|
else:
|
|
s -= 5
|
|
|
|
return min(100, max(0, s))
|