feat: EmailManager initial commit — Python Google APIs (Gmail, Calendar), Telegram Bot API, Ollama (local LLM), Craigslist RSS, OpenStreetMap/Overpass

This commit is contained in:
2026-07-03 12:15:46 -04:00
commit 8e3216957c
9 changed files with 2407 additions and 0 deletions
+9
View File
@@ -0,0 +1,9 @@
# Copy this file to .env and fill in real values.
# NEVER commit .env to git.
# Telegram bot credentials
TELEGRAM_TOKEN=your_telegram_bot_token_here
TELEGRAM_CHAT_ID=your_telegram_chat_id_here
# Google Places API key (currently unused — app uses OSM/Overpass)
GOOGLE_PLACES_KEY=your_google_places_api_key_here
+51
View File
@@ -0,0 +1,51 @@
# Python
__pycache__/
*.py[cod]
*.pyo
*.pyc
*.pyd
.Python
*.egg
*.egg-info/
# Virtual environments
.venv/
venv/
env/
ENV/
# Environment / secrets — NEVER commit these
.env
.env.*
!.env.example
# Google OAuth credentials and tokens — contain secrets
credentials.json
token_*.pickle
# State / cache files
last_check.json
# Backup files
*.bak
*.bak2
# Logs and output
*.log
*.txt
!requirements.txt
# Build / dist
build/
dist/
output/
# Type checking / linting caches
.mypy_cache/
.pytest_cache/
.ruff_cache/
# OS artifacts
.DS_Store
Thumbs.db
desktop.ini
+3
View File
@@ -0,0 +1,3 @@
Set WshShell = CreateObject("WScript.Shell")
WshShell.Run "python ""D:\Proyectos Software\EmailManager\email_assistant.py""", 0, False
Set WshShell = Nothing
+31
View File
@@ -0,0 +1,31 @@
FROM llama3.1:8b
SYSTEM """
Eres el asistente personal de Alvaro Romero. Siempre respondes en español, de forma concisa y útil.
PERFIL DE ALVARO:
- Busca trabajo activamente en sector marítimo e ingeniería
- Conductor Uber y Lyft (tiene Checkr, Lyft Direct/Payfare)
- Bancos USA: Chase, Wells Fargo, Citi, Discover, Space Coast Credit Union
- Pagos: PayPal, Stripe, Global66 (transferencias a Colombia)
- Inversiones: Interactive Brokers, OANDA, Alpaca Markets
- Tiendas: Amazon, Alibaba, AliExpress, Temu, Walmart, Costco, Home Depot, Vevor, Harbor Freight
- Empleo: Indeed, smaritime y plataformas marítimas similares
- Colombia: Colsanitas, EPS Sanitas, Tigo, Telefonica, ePayco, Registraduría Nacional
- Proyecto propio: AutoBooking (app de transporte en WordPress)
CLASIFICACIÓN DE CORREOS — criterios:
IMPORTANTE: transacciones bancarias reales, pedidos/envíos con número de orden, ofertas de trabajo legítimas, correos de salud, documentos legales, personas reales conocidas, plataformas conocidas con contenido relevante para Alvaro
DUDOSO: dominio sospechoso, urgencia exagerada, pide contraseñas o datos personales, no puedes verificar el origen
BASURA: publicidad pura, newsletters, promociones sin transacción activa, spam
REGLAS ESTRICTAS:
- Responder SIEMPRE en español
- Cuando se pida JSON: responder SOLO el JSON, sin texto antes ni después
- Ser conciso — máximo 3 líneas para respuestas generales
- No inventar información que no esté en el contexto
"""
PARAMETER temperature 0.15
PARAMETER num_ctx 4096
PARAMETER num_predict 512
+12
View File
@@ -0,0 +1,12 @@
Dim WshShell
Set WshShell = CreateObject("WScript.Shell")
' Matar solo el proceso python que corre email_assistant.py
WshShell.Run "wmic process where ""name='python.exe' and commandline like '%email_assistant%'"" call terminate", 0, True
WScript.Sleep 2000
' Arrancar de nuevo
WshShell.Run "python ""D:\Proyectos Software\EmailManager\email_assistant.py""", 0, False
Set WshShell = Nothing
+134
View File
@@ -0,0 +1,134 @@
"""
Fase 1: Analiza remitentes sin borrar nada.
Genera report.txt con todos los senders agrupados por categoria.
"""
import os
import sys
import re
import pickle
from collections import defaultdict
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
sys.stdout.reconfigure(encoding='utf-8')
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json')
def authenticate(account_name):
token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle')
creds = None
if os.path.exists(token_file):
with open(token_file, 'rb') as f:
creds = pickle.load(f)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
with open(token_file, 'wb') as f:
pickle.dump(creds, f)
return build('gmail', 'v1', credentials=creds)
def get_header(headers, name):
for h in headers:
if h['name'].lower() == name.lower():
return h['value']
return ''
def extract_domain(sender):
match = re.search(r'@([\w.\-]+)', sender)
return match.group(1).lower() if match else sender.lower()
def analyze_account(account_name, email):
print(f"\nConectando {email}...")
service = authenticate(account_name)
print(f"Autenticado. Leyendo correos...")
senders = defaultdict(int) # domain -> count
sender_names = {} # domain -> full sender example
queries = [
('Promotions', 'category:promotions'),
('Updates', 'category:updates'),
('Inbox', 'in:inbox'),
]
for category, query in queries:
print(f" Escaneando: {category}...")
page_token = None
count = 0
while True:
kwargs = {'userId': 'me', 'q': query, 'maxResults': 500}
if page_token:
kwargs['pageToken'] = page_token
result = service.users().messages().list(**kwargs).execute()
messages = result.get('messages', [])
if not messages:
break
for msg_ref in messages:
try:
msg = service.users().messages().get(
userId='me', id=msg_ref['id'],
format='metadata',
metadataHeaders=['From']
).execute()
headers = msg.get('payload', {}).get('headers', [])
sender = get_header(headers, 'From')
domain = extract_domain(sender)
senders[domain] += 1
if domain not in sender_names:
sender_names[domain] = sender
count += 1
except Exception:
pass
page_token = result.get('nextPageToken')
if not page_token:
break
print(f" {count} mensajes procesados")
return senders, sender_names
def main():
accounts = [
('alro65', 'alro65@gmail.com'),
('alro65usa', 'alro65usa@gmail.com'),
]
report_lines = []
for account_name, email in accounts:
senders, sender_names = analyze_account(account_name, email)
report_lines.append(f"\n{'='*60}")
report_lines.append(f"CUENTA: {email}")
report_lines.append(f"{'='*60}")
report_lines.append(f"Total remitentes unicos: {len(senders)}")
report_lines.append(f"\nRemitentes ordenados por cantidad de correos:\n")
for domain, count in sorted(senders.items(), key=lambda x: -x[1]):
full_sender = sender_names.get(domain, domain)
report_lines.append(f" {count:5d} {domain:<40} {full_sender[:60]}")
report_path = os.path.join(os.path.dirname(__file__), 'report.txt')
with open(report_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(report_lines))
print(f"\nReporte guardado en: {report_path}")
print("Revisalo y dime que hacer con cada remitente.")
# Also print top 30
print("\n--- TOP senders (preview) ---")
for line in report_lines[-50:]:
print(line)
if __name__ == '__main__':
main()
+1805
View File
File diff suppressed because it is too large Load Diff
+342
View File
@@ -0,0 +1,342 @@
"""
EmailManager — Gmail cleanup & organization
Accounts: alro65@gmail.com, alro65usa@gmail.com
"""
import os
import sys
import base64
import re
import json
import pickle
import time
import urllib.request
sys.stdout.reconfigure(encoding='utf-8')
from datetime import datetime
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
SCOPES = [
'https://www.googleapis.com/auth/gmail.modify',
'https://www.googleapis.com/auth/gmail.labels',
]
CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json')
# ── Label structure ────────────────────────────────────────────────────────────
LABELS_TO_CREATE = [
"Bancos/Extractos",
"Bancos/Promo",
"Trabajo",
"AutoBooking",
"Recibos",
"Newsletters",
]
# ── Bank senders (promo goes to Bancos/Promo, statements to Bancos/Extractos) ─
BANK_DOMAINS = [
'bancolombia', 'davivienda', 'bbva', 'scotiabank', 'citibank',
'hsbc', 'santander', 'chase', 'wellsfargo', 'bankofamerica',
'capitalone', 'discover', 'americanexpress', 'amex', 'nequi',
'daviplata', 'bold', 'bancodeoccidente', 'bancopopular',
'coopcentral', 'ing.', 'paypal', 'stripe',
]
BANK_STATEMENT_KEYWORDS = [
'estado de cuenta', 'extracto', 'resumen de cuenta', 'account statement',
'transaction alert', 'alerta de transacción', 'compra realizada',
'pago recibido', 'transferencia', 'your statement',
]
# ── Spam/promo keywords in subject or sender ──────────────────────────────────
SPAM_KEYWORDS = [
'insurance quote', 'cotización de seguro', 'auto insurance',
'car insurance', 'life insurance', 'health insurance',
'get a quote', 'free quote', 'save on insurance',
'compare rates', 'lowest rates', 'best rates',
'final notice', 'last chance', 'act now', 'limited time',
'you\'ve been selected', 'congratulations you won',
'unclaimed reward', 'claim your prize',
]
PROMO_CATEGORIES = ['CATEGORY_PROMOTIONS', 'CATEGORY_UPDATES']
def authenticate(account_name: str) -> object:
token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle')
creds = None
if os.path.exists(token_file):
with open(token_file, 'rb') as f:
creds = pickle.load(f)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
with open(token_file, 'wb') as f:
pickle.dump(creds, f)
return build('gmail', 'v1', credentials=creds)
def get_or_create_label(service, name: str, label_cache: dict) -> str:
if name in label_cache:
return label_cache[name]
labels = service.users().labels().list(userId='me').execute().get('labels', [])
for lbl in labels:
if lbl['name'].lower() == name.lower():
label_cache[name] = lbl['id']
return lbl['id']
# Create nested labels parent-first
parts = name.split('/')
for i in range(1, len(parts) + 1):
partial = '/'.join(parts[:i])
if partial not in label_cache:
exists = next((l for l in labels if l['name'].lower() == partial.lower()), None)
if exists:
label_cache[partial] = exists['id']
else:
body = {
'name': partial,
'labelListVisibility': 'labelShow',
'messageListVisibility': 'show',
}
new = service.users().labels().create(userId='me', body=body).execute()
label_cache[partial] = new['id']
print(f" [+] Label creado: {partial}")
return label_cache[name]
def get_header(headers: list, name: str) -> str:
for h in headers:
if h['name'].lower() == name.lower():
return h['value']
return ''
def extract_unsubscribe_url(headers: list) -> str | None:
raw = get_header(headers, 'List-Unsubscribe')
if not raw:
return None
# Prefer HTTPS link over mailto
urls = re.findall(r'<(https?://[^>]+)>', raw)
return urls[0] if urls else None
def try_unsubscribe(url: str) -> bool:
# Guard: only follow HTTPS unsubscribe links to avoid HTTP downgrade / SSRF
if not url.startswith('https://'):
return False
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
urllib.request.urlopen(req, timeout=10)
return True
except Exception:
return False
def is_bank_email(sender: str, subject: str) -> tuple[bool, bool]:
"""Returns (is_bank, is_statement)."""
combined = (sender + ' ' + subject).lower()
if not any(b in combined for b in BANK_DOMAINS):
return False, False
is_statement = any(k in combined for k in BANK_STATEMENT_KEYWORDS)
return True, is_statement
def is_spam(sender: str, subject: str) -> bool:
combined = (sender + ' ' + subject).lower()
return any(k in combined for k in SPAM_KEYWORDS)
def process_account(account_name: str):
print(f"\n{'='*60}")
print(f" Procesando: {account_name}")
print(f"{'='*60}")
service = authenticate(account_name)
label_cache = {}
# Ensure all labels exist
print("\n[1] Creando estructura de labels...")
for lbl in LABELS_TO_CREATE:
get_or_create_label(service, lbl, label_cache)
stats = {
'unsubscribed': 0,
'deleted_spam': 0,
'archived_bank_promo': 0,
'archived_bank_statement': 0,
'errors': 0,
}
# ── Pass 1: Promotions category ────────────────────────────────────────────
print("\n[2] Analizando correos promocionales...")
query = 'category:promotions OR category:updates'
page_token = None
while True:
try:
kwargs = {'userId': 'me', 'q': query, 'maxResults': 500}
if page_token:
kwargs['pageToken'] = page_token
result = service.users().messages().list(**kwargs).execute()
except HttpError as e:
print(f" Error listando mensajes: {e}")
break
messages = result.get('messages', [])
if not messages:
break
print(f" Encontrados {len(messages)} mensajes en esta página...")
for msg_ref in messages:
try:
msg = service.users().messages().get(
userId='me', id=msg_ref['id'],
format='metadata',
metadataHeaders=['From', 'Subject', 'List-Unsubscribe']
).execute()
headers = msg.get('payload', {}).get('headers', [])
sender = get_header(headers, 'From')
subject = get_header(headers, 'Subject')
labels = msg.get('labelIds', [])
is_bank, is_statement = is_bank_email(sender, subject)
if is_bank:
if is_statement:
lbl_id = get_or_create_label(service, 'Bancos/Extractos', label_cache)
action = 'archive_statement'
else:
lbl_id = get_or_create_label(service, 'Bancos/Promo', label_cache)
action = 'archive_bank_promo'
service.users().messages().modify(
userId='me', id=msg_ref['id'],
body={'addLabelIds': [lbl_id], 'removeLabelIds': ['INBOX']}
).execute()
if action == 'archive_statement':
stats['archived_bank_statement'] += 1
else:
stats['archived_bank_promo'] += 1
else:
# Non-bank promo → unsubscribe + delete
unsub_url = extract_unsubscribe_url(headers)
if unsub_url:
ok = try_unsubscribe(unsub_url)
if ok:
stats['unsubscribed'] += 1
print(f" [OK] Unsubscribe: {sender[:60]}")
service.users().messages().trash(userId='me', id=msg_ref['id']).execute()
stats['deleted_spam'] += 1
except HttpError as e:
stats['errors'] += 1
if e.resp.status == 429:
print(" Rate limit — esperando 5s...")
time.sleep(5)
page_token = result.get('nextPageToken')
if not page_token:
break
# ── Pass 2: Explicit spam keywords in inbox ────────────────────────────────
print("\n[3] Buscando spam por keywords en inbox...")
spam_query = ' OR '.join(f'subject:"{k}"' for k in SPAM_KEYWORDS[:8]) # Gmail query limit
try:
result = service.users().messages().list(
userId='me', q=f'in:inbox ({spam_query})', maxResults=500
).execute()
messages = result.get('messages', [])
print(f" Encontrados {len(messages)} mensajes spam por keyword...")
for msg_ref in messages:
try:
msg = service.users().messages().get(
userId='me', id=msg_ref['id'],
format='metadata',
metadataHeaders=['From', 'Subject', 'List-Unsubscribe']
).execute()
headers = msg.get('payload', {}).get('headers', [])
sender = get_header(headers, 'From')
subject = get_header(headers, 'Subject')
unsub_url = extract_unsubscribe_url(headers)
if unsub_url:
ok = try_unsubscribe(unsub_url)
if ok:
stats['unsubscribed'] += 1
service.users().messages().trash(userId='me', id=msg_ref['id']).execute()
stats['deleted_spam'] += 1
except HttpError:
stats['errors'] += 1
except HttpError as e:
print(f" Error en búsqueda spam: {e}")
# ── Summary ────────────────────────────────────────────────────────────────
print(f"\n{''*40}")
print(f" RESUMEN — {account_name}")
print(f" Unsubscribes realizados : {stats['unsubscribed']}")
print(f" Correos eliminados : {stats['deleted_spam']}")
print(f" Bancos/Promo : {stats['archived_bank_promo']}")
print(f" Bancos/Extractos : {stats['archived_bank_statement']}")
print(f" Errores : {stats['errors']}")
print(f"{''*40}")
return stats
def main():
accounts = ['alro65', 'alro65usa'] # token files: token_alro65.pickle, token_alro65usa.pickle
print("=" * 40)
print(" EmailManager -- Limpieza Gmail")
print("=" * 40)
print("\nSe procesaran:")
print(" - alro65@gmail.com")
print(" - alro65usa@gmail.com")
print("\nPara cada cuenta se abrirá el navegador para autenticación OAuth.")
print("Usa ventana incógnito si es necesario.\n")
input("Presiona ENTER para comenzar...")
total_deleted = 0
total_unsub = 0
for acc in accounts:
stats = process_account(acc)
total_deleted += stats['deleted_spam']
total_unsub += stats['unsubscribed']
print(f"\n{'='*40}")
print(f" TOTAL GENERAL")
print(f" Unsubscribes : {total_unsub}")
print(f" Eliminados : {total_deleted}")
print(f"{'='*40}")
print("\nListo. Vacía la papelera en Gmail para liberar espacio.")
if __name__ == '__main__':
main()
+20
View File
@@ -0,0 +1,20 @@
import urllib.request, urllib.parse, json, os
TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '')
TELEGRAM_CHAT_ID = os.environ.get('TELEGRAM_CHAT_ID', '')
msg = (
'\U0001F514 <b>RECORDATORIO</b>\n\n'
'Esperar llamada de <b>Jaime Otero</b>\n'
'Hora: <b>12:00 PM</b>'
)
params = {
'chat_id': TELEGRAM_CHAT_ID,
'text': msg,
'parse_mode': 'HTML',
'disable_notification': 'false',
}
data = urllib.parse.urlencode(params).encode()
url = f'https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage'
urllib.request.urlopen(urllib.request.Request(url, data=data), timeout=10)