86 lines
3.0 KiB
Python
86 lines
3.0 KiB
Python
"""US Census Geocoder - address -> lat/lng/county/state.
|
|
|
|
API gratis, no key, sin rate limits documentados (uso razonable).
|
|
Documentacion: https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
|
|
|
|
Limitacion: SOLO USA (incluye PR, GU, AS, MP, VI).
|
|
|
|
Devuelve dict con:
|
|
matched_address, lat, lng, city, state, zip, county_name, county_fips, state_fips
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import requests
|
|
|
|
from .base import FetcherError, USER_AGENT, DEFAULT_TIMEOUT
|
|
|
|
|
|
CENSUS_URL = "https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress"
|
|
|
|
|
|
def fetch_geocode(address: str) -> dict:
|
|
"""Geocodifica una direccion USA. Raises FetcherError si no hay match."""
|
|
if not address or len(address.strip()) < 5:
|
|
raise FetcherError("address vacio o muy corto")
|
|
|
|
params = {
|
|
"address": address.strip(),
|
|
"benchmark": "Public_AR_Current",
|
|
"vintage": "Current_Current",
|
|
"format": "json",
|
|
# Necesitamos Census Tracts ademas de Counties para neighborhood_class
|
|
"layers": "Census Tracts,Counties,2020 Census Blocks",
|
|
}
|
|
headers = {"User-Agent": USER_AGENT}
|
|
|
|
try:
|
|
r = requests.get(CENSUS_URL, params=params, headers=headers, timeout=DEFAULT_TIMEOUT)
|
|
r.raise_for_status()
|
|
except requests.RequestException as e:
|
|
raise FetcherError(f"HTTP error: {e}") from e
|
|
|
|
try:
|
|
data = r.json()
|
|
except ValueError as e:
|
|
raise FetcherError(f"JSON parse error: {e}") from e
|
|
|
|
matches = data.get("result", {}).get("addressMatches", [])
|
|
if not matches:
|
|
raise FetcherError(f"No geocode match for: {address!r}")
|
|
|
|
m = matches[0]
|
|
coords = m.get("coordinates", {}) or {}
|
|
comp = m.get("addressComponents", {}) or {}
|
|
geos = m.get("geographies", {}) or {}
|
|
|
|
# Counties layer; nombre varia entre vintages: a veces "Counties", a veces con sufijo
|
|
counties = (
|
|
geos.get("Counties")
|
|
or geos.get("2020 Census Counties")
|
|
or geos.get("County Subdivisions")
|
|
or []
|
|
)
|
|
county = counties[0] if counties else {}
|
|
|
|
# Census Tracts: granularidad de vecindario (~4K personas por tract)
|
|
tracts = geos.get("Census Tracts") or geos.get("2020 Census Tracts") or []
|
|
tract = tracts[0] if tracts else {}
|
|
|
|
return {
|
|
"matched_address": m.get("matchedAddress"),
|
|
"lat": coords.get("y"),
|
|
"lng": coords.get("x"),
|
|
"city": comp.get("city"),
|
|
"state": comp.get("state"),
|
|
"zip": comp.get("zip"),
|
|
"county_name": county.get("NAME") or county.get("BASENAME"),
|
|
"county_fips": county.get("GEOID"), # e.g. "12086"
|
|
"state_fips": county.get("STATE"), # e.g. "12"
|
|
# NEW: tract info para neighborhood_class
|
|
"tract_geoid": tract.get("GEOID"), # e.g. "12086007608"
|
|
"tract_code": tract.get("TRACT"), # e.g. "007608" (6-digit, sin state/county)
|
|
"tract_name": tract.get("NAME"), # e.g. "Census Tract 76.08"
|
|
"county_code_only": county.get("COUNTY") or tract.get("COUNTY"), # e.g. "086" (3-digit county solo)
|
|
}
|