Files
EmailManager/analyze_senders.py
T

135 lines
4.4 KiB
Python

"""
Fase 1: Analiza remitentes sin borrar nada.
Genera report.txt con todos los senders agrupados por categoria.
"""
import os
import sys
import re
import pickle
from collections import defaultdict
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
sys.stdout.reconfigure(encoding='utf-8')
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json')
def authenticate(account_name):
token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle')
creds = None
if os.path.exists(token_file):
with open(token_file, 'rb') as f:
creds = pickle.load(f)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
with open(token_file, 'wb') as f:
pickle.dump(creds, f)
return build('gmail', 'v1', credentials=creds)
def get_header(headers, name):
for h in headers:
if h['name'].lower() == name.lower():
return h['value']
return ''
def extract_domain(sender):
match = re.search(r'@([\w.\-]+)', sender)
return match.group(1).lower() if match else sender.lower()
def analyze_account(account_name, email):
print(f"\nConectando {email}...")
service = authenticate(account_name)
print(f"Autenticado. Leyendo correos...")
senders = defaultdict(int) # domain -> count
sender_names = {} # domain -> full sender example
queries = [
('Promotions', 'category:promotions'),
('Updates', 'category:updates'),
('Inbox', 'in:inbox'),
]
for category, query in queries:
print(f" Escaneando: {category}...")
page_token = None
count = 0
while True:
kwargs = {'userId': 'me', 'q': query, 'maxResults': 500}
if page_token:
kwargs['pageToken'] = page_token
result = service.users().messages().list(**kwargs).execute()
messages = result.get('messages', [])
if not messages:
break
for msg_ref in messages:
try:
msg = service.users().messages().get(
userId='me', id=msg_ref['id'],
format='metadata',
metadataHeaders=['From']
).execute()
headers = msg.get('payload', {}).get('headers', [])
sender = get_header(headers, 'From')
domain = extract_domain(sender)
senders[domain] += 1
if domain not in sender_names:
sender_names[domain] = sender
count += 1
except Exception:
pass
page_token = result.get('nextPageToken')
if not page_token:
break
print(f" {count} mensajes procesados")
return senders, sender_names
def main():
accounts = [
('alro65', 'alro65@gmail.com'),
('alro65usa', 'alro65usa@gmail.com'),
]
report_lines = []
for account_name, email in accounts:
senders, sender_names = analyze_account(account_name, email)
report_lines.append(f"\n{'='*60}")
report_lines.append(f"CUENTA: {email}")
report_lines.append(f"{'='*60}")
report_lines.append(f"Total remitentes unicos: {len(senders)}")
report_lines.append(f"\nRemitentes ordenados por cantidad de correos:\n")
for domain, count in sorted(senders.items(), key=lambda x: -x[1]):
full_sender = sender_names.get(domain, domain)
report_lines.append(f" {count:5d} {domain:<40} {full_sender[:60]}")
report_path = os.path.join(os.path.dirname(__file__), 'report.txt')
with open(report_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(report_lines))
print(f"\nReporte guardado en: {report_path}")
print("Revisalo y dime que hacer con cada remitente.")
# Also print top 30
print("\n--- TOP senders (preview) ---")
for line in report_lines[-50:]:
print(line)
if __name__ == '__main__':
main()