135 lines
4.4 KiB
Python
135 lines
4.4 KiB
Python
"""
|
|
Fase 1: Analiza remitentes sin borrar nada.
|
|
Genera report.txt con todos los senders agrupados por categoria.
|
|
"""
|
|
import os
|
|
import sys
|
|
import re
|
|
import pickle
|
|
from collections import defaultdict
|
|
from google.auth.transport.requests import Request
|
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
from googleapiclient.discovery import build
|
|
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
|
CREDENTIALS_FILE = os.path.join(os.path.dirname(__file__), 'credentials.json')
|
|
|
|
|
|
def authenticate(account_name):
|
|
token_file = os.path.join(os.path.dirname(__file__), f'token_{account_name}.pickle')
|
|
creds = None
|
|
if os.path.exists(token_file):
|
|
with open(token_file, 'rb') as f:
|
|
creds = pickle.load(f)
|
|
if not creds or not creds.valid:
|
|
if creds and creds.expired and creds.refresh_token:
|
|
creds.refresh(Request())
|
|
else:
|
|
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
|
|
creds = flow.run_local_server(port=0)
|
|
with open(token_file, 'wb') as f:
|
|
pickle.dump(creds, f)
|
|
return build('gmail', 'v1', credentials=creds)
|
|
|
|
|
|
def get_header(headers, name):
|
|
for h in headers:
|
|
if h['name'].lower() == name.lower():
|
|
return h['value']
|
|
return ''
|
|
|
|
|
|
def extract_domain(sender):
|
|
match = re.search(r'@([\w.\-]+)', sender)
|
|
return match.group(1).lower() if match else sender.lower()
|
|
|
|
|
|
def analyze_account(account_name, email):
|
|
print(f"\nConectando {email}...")
|
|
service = authenticate(account_name)
|
|
print(f"Autenticado. Leyendo correos...")
|
|
|
|
senders = defaultdict(int) # domain -> count
|
|
sender_names = {} # domain -> full sender example
|
|
|
|
queries = [
|
|
('Promotions', 'category:promotions'),
|
|
('Updates', 'category:updates'),
|
|
('Inbox', 'in:inbox'),
|
|
]
|
|
|
|
for category, query in queries:
|
|
print(f" Escaneando: {category}...")
|
|
page_token = None
|
|
count = 0
|
|
while True:
|
|
kwargs = {'userId': 'me', 'q': query, 'maxResults': 500}
|
|
if page_token:
|
|
kwargs['pageToken'] = page_token
|
|
result = service.users().messages().list(**kwargs).execute()
|
|
messages = result.get('messages', [])
|
|
if not messages:
|
|
break
|
|
for msg_ref in messages:
|
|
try:
|
|
msg = service.users().messages().get(
|
|
userId='me', id=msg_ref['id'],
|
|
format='metadata',
|
|
metadataHeaders=['From']
|
|
).execute()
|
|
headers = msg.get('payload', {}).get('headers', [])
|
|
sender = get_header(headers, 'From')
|
|
domain = extract_domain(sender)
|
|
senders[domain] += 1
|
|
if domain not in sender_names:
|
|
sender_names[domain] = sender
|
|
count += 1
|
|
except Exception:
|
|
pass
|
|
page_token = result.get('nextPageToken')
|
|
if not page_token:
|
|
break
|
|
print(f" {count} mensajes procesados")
|
|
|
|
return senders, sender_names
|
|
|
|
|
|
def main():
|
|
accounts = [
|
|
('alro65', 'alro65@gmail.com'),
|
|
('alro65usa', 'alro65usa@gmail.com'),
|
|
]
|
|
|
|
report_lines = []
|
|
|
|
for account_name, email in accounts:
|
|
senders, sender_names = analyze_account(account_name, email)
|
|
|
|
report_lines.append(f"\n{'='*60}")
|
|
report_lines.append(f"CUENTA: {email}")
|
|
report_lines.append(f"{'='*60}")
|
|
report_lines.append(f"Total remitentes unicos: {len(senders)}")
|
|
report_lines.append(f"\nRemitentes ordenados por cantidad de correos:\n")
|
|
|
|
for domain, count in sorted(senders.items(), key=lambda x: -x[1]):
|
|
full_sender = sender_names.get(domain, domain)
|
|
report_lines.append(f" {count:5d} {domain:<40} {full_sender[:60]}")
|
|
|
|
report_path = os.path.join(os.path.dirname(__file__), 'report.txt')
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|
f.write('\n'.join(report_lines))
|
|
|
|
print(f"\nReporte guardado en: {report_path}")
|
|
print("Revisalo y dime que hacer con cada remitente.")
|
|
|
|
# Also print top 30
|
|
print("\n--- TOP senders (preview) ---")
|
|
for line in report_lines[-50:]:
|
|
print(line)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|