feat: EmailManager initial commit — Python Google APIs (Gmail, Calendar), Telegram Bot API, Ollama (local LLM), Craigslist RSS, OpenStreetMap/Overpass
This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
Fase 1: Analiza remitentes sin borrar nada.
|
||||
Genera report.txt con todos los senders agrupados por categoria.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from google.auth.transport.requests import Request
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
||||
CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json')
|
||||
|
||||
|
||||
def authenticate(account_name):
|
||||
token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle')
|
||||
creds = None
|
||||
if os.path.exists(token_file):
|
||||
with open(token_file, 'rb') as f:
|
||||
creds = pickle.load(f)
|
||||
if not creds or not creds.valid:
|
||||
if creds and creds.expired and creds.refresh_token:
|
||||
creds.refresh(Request())
|
||||
else:
|
||||
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
with open(token_file, 'wb') as f:
|
||||
pickle.dump(creds, f)
|
||||
return build('gmail', 'v1', credentials=creds)
|
||||
|
||||
|
||||
def get_header(headers, name):
|
||||
for h in headers:
|
||||
if h['name'].lower() == name.lower():
|
||||
return h['value']
|
||||
return ''
|
||||
|
||||
|
||||
def extract_domain(sender):
|
||||
match = re.search(r'@([\w.\-]+)', sender)
|
||||
return match.group(1).lower() if match else sender.lower()
|
||||
|
||||
|
||||
def analyze_account(account_name, email):
|
||||
print(f"\nConectando {email}...")
|
||||
service = authenticate(account_name)
|
||||
print(f"Autenticado. Leyendo correos...")
|
||||
|
||||
senders = defaultdict(int) # domain -> count
|
||||
sender_names = {} # domain -> full sender example
|
||||
|
||||
queries = [
|
||||
('Promotions', 'category:promotions'),
|
||||
('Updates', 'category:updates'),
|
||||
('Inbox', 'in:inbox'),
|
||||
]
|
||||
|
||||
for category, query in queries:
|
||||
print(f" Escaneando: {category}...")
|
||||
page_token = None
|
||||
count = 0
|
||||
while True:
|
||||
kwargs = {'userId': 'me', 'q': query, 'maxResults': 500}
|
||||
if page_token:
|
||||
kwargs['pageToken'] = page_token
|
||||
result = service.users().messages().list(**kwargs).execute()
|
||||
messages = result.get('messages', [])
|
||||
if not messages:
|
||||
break
|
||||
for msg_ref in messages:
|
||||
try:
|
||||
msg = service.users().messages().get(
|
||||
userId='me', id=msg_ref['id'],
|
||||
format='metadata',
|
||||
metadataHeaders=['From']
|
||||
).execute()
|
||||
headers = msg.get('payload', {}).get('headers', [])
|
||||
sender = get_header(headers, 'From')
|
||||
domain = extract_domain(sender)
|
||||
senders[domain] += 1
|
||||
if domain not in sender_names:
|
||||
sender_names[domain] = sender
|
||||
count += 1
|
||||
except Exception:
|
||||
pass
|
||||
page_token = result.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
print(f" {count} mensajes procesados")
|
||||
|
||||
return senders, sender_names
|
||||
|
||||
|
||||
def main():
|
||||
accounts = [
|
||||
('alro65', 'alro65@gmail.com'),
|
||||
('alro65usa', 'alro65usa@gmail.com'),
|
||||
]
|
||||
|
||||
report_lines = []
|
||||
|
||||
for account_name, email in accounts:
|
||||
senders, sender_names = analyze_account(account_name, email)
|
||||
|
||||
report_lines.append(f"\n{'='*60}")
|
||||
report_lines.append(f"CUENTA: {email}")
|
||||
report_lines.append(f"{'='*60}")
|
||||
report_lines.append(f"Total remitentes unicos: {len(senders)}")
|
||||
report_lines.append(f"\nRemitentes ordenados por cantidad de correos:\n")
|
||||
|
||||
for domain, count in sorted(senders.items(), key=lambda x: -x[1]):
|
||||
full_sender = sender_names.get(domain, domain)
|
||||
report_lines.append(f" {count:5d} {domain:<40} {full_sender[:60]}")
|
||||
|
||||
report_path = os.path.join(os.path.dirname(__file__), 'report.txt')
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(report_lines))
|
||||
|
||||
print(f"\nReporte guardado en: {report_path}")
|
||||
print("Revisalo y dime que hacer con cada remitente.")
|
||||
|
||||
# Also print top 30
|
||||
print("\n--- TOP senders (preview) ---")
|
||||
for line in report_lines[-50:]:
|
||||
print(line)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user