""" Fase 1: Analiza remitentes sin borrar nada. Genera report.txt con todos los senders agrupados por categoria. """ import os import sys import re import pickle from collections import defaultdict from google.auth.transport.requests import Request from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build sys.stdout.reconfigure(encoding='utf-8') SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'] CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json') def authenticate(account_name): token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle') creds = None if os.path.exists(token_file): with open(token_file, 'rb') as f: creds = pickle.load(f) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES) creds = flow.run_local_server(port=0) with open(token_file, 'wb') as f: pickle.dump(creds, f) return build('gmail', 'v1', credentials=creds) def get_header(headers, name): for h in headers: if h['name'].lower() == name.lower(): return h['value'] return '' def extract_domain(sender): match = re.search(r'@([\w.\-]+)', sender) return match.group(1).lower() if match else sender.lower() def analyze_account(account_name, email): print(f"\nConectando {email}...") service = authenticate(account_name) print(f"Autenticado. Leyendo correos...") senders = defaultdict(int) # domain -> count sender_names = {} # domain -> full sender example queries = [ ('Promotions', 'category:promotions'), ('Updates', 'category:updates'), ('Inbox', 'in:inbox'), ] for category, query in queries: print(f" Escaneando: {category}...") page_token = None count = 0 while True: kwargs = {'userId': 'me', 'q': query, 'maxResults': 500} if page_token: kwargs['pageToken'] = page_token result = service.users().messages().list(**kwargs).execute() messages = result.get('messages', []) if not messages: break for msg_ref in messages: try: msg = service.users().messages().get( userId='me', id=msg_ref['id'], format='metadata', metadataHeaders=['From'] ).execute() headers = msg.get('payload', {}).get('headers', []) sender = get_header(headers, 'From') domain = extract_domain(sender) senders[domain] += 1 if domain not in sender_names: sender_names[domain] = sender count += 1 except Exception: pass page_token = result.get('nextPageToken') if not page_token: break print(f" {count} mensajes procesados") return senders, sender_names def main(): accounts = [ ('alro65', 'alro65@gmail.com'), ('alro65usa', 'alro65usa@gmail.com'), ] report_lines = [] for account_name, email in accounts: senders, sender_names = analyze_account(account_name, email) report_lines.append(f"\n{'='*60}") report_lines.append(f"CUENTA: {email}") report_lines.append(f"{'='*60}") report_lines.append(f"Total remitentes unicos: {len(senders)}") report_lines.append(f"\nRemitentes ordenados por cantidad de correos:\n") for domain, count in sorted(senders.items(), key=lambda x: -x[1]): full_sender = sender_names.get(domain, domain) report_lines.append(f" {count:5d} {domain:<40} {full_sender[:60]}") report_path = os.path.join(os.path.dirname(__file__), 'report.txt') with open(report_path, 'w', encoding='utf-8') as f: f.write('\n'.join(report_lines)) print(f"\nReporte guardado en: {report_path}") print("Revisalo y dime que hacer con cada remitente.") # Also print top 30 print("\n--- TOP senders (preview) ---") for line in report_lines[-50:]: print(line) if __name__ == '__main__': main()