Initial commit: Discord automation tools
This commit is contained in:
242
scripts/user_history_scraper.py
Executable file
242
scripts/user_history_scraper.py
Executable file
@@ -0,0 +1,242 @@
|
||||
# discord_tools/scripts/user_history_scraper.py
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# Add the parent directory to the Python path
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(os.path.dirname(script_dir))
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
from discord_tools.config.settings import DATA_DIR
|
||||
from discord_tools.utils.api_utils import make_discord_request
|
||||
|
||||
def get_mutual_guilds():
|
||||
# Get all servers you're in
|
||||
response = make_discord_request('GET', '/users/@me/guilds')
|
||||
if not response:
|
||||
print("Failed to fetch guilds")
|
||||
return []
|
||||
return response.json()
|
||||
|
||||
def get_guild_channels(guild_id):
|
||||
# Get channels in a server
|
||||
response = make_discord_request('GET', f'/guilds/{guild_id}/channels')
|
||||
if not response:
|
||||
return []
|
||||
return response.json()
|
||||
|
||||
def search_user_messages_in_guild(guild_id, user_id):
|
||||
# Use Discord's search API to find messages from a specific user
|
||||
# Way faster than manually paginating through every channel
|
||||
all_messages = []
|
||||
offset = 0
|
||||
|
||||
while True:
|
||||
endpoint = f'/guilds/{guild_id}/messages/search?author_id={user_id}&offset={offset}'
|
||||
response = make_discord_request('GET', endpoint)
|
||||
|
||||
if not response:
|
||||
break
|
||||
|
||||
data = response.json()
|
||||
messages = data.get('messages', [])
|
||||
|
||||
if not messages:
|
||||
break
|
||||
|
||||
# Discord returns messages in a weird nested format
|
||||
# Each item is a list containing related messages
|
||||
for message_group in messages:
|
||||
if isinstance(message_group, list):
|
||||
for msg in message_group:
|
||||
if msg['author']['id'] == user_id:
|
||||
all_messages.append(msg)
|
||||
else:
|
||||
if message_group['author']['id'] == user_id:
|
||||
all_messages.append(message_group)
|
||||
|
||||
# Pagination
|
||||
offset += 25
|
||||
total_results = data.get('total_results', 0)
|
||||
|
||||
print(f" Found {len(all_messages)} messages so far...")
|
||||
|
||||
# If we've gotten all results, stop
|
||||
if offset >= total_results:
|
||||
break
|
||||
|
||||
return all_messages
|
||||
|
||||
def get_user_info_from_guild(guild_id, user_id):
|
||||
# Try to get user info from guild member endpoint
|
||||
endpoint = f'/guilds/{guild_id}/members/{user_id}'
|
||||
response = make_discord_request('GET', endpoint)
|
||||
|
||||
if response:
|
||||
member_data = response.json()
|
||||
return member_data.get('user')
|
||||
|
||||
return None
|
||||
|
||||
def format_message_for_export(msg):
|
||||
# Convert message to a readable format
|
||||
timestamp = datetime.fromisoformat(msg['timestamp'].replace('Z', '+00:00'))
|
||||
|
||||
formatted = {
|
||||
'id': msg['id'],
|
||||
'timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'content': msg['content'],
|
||||
'attachments': [att['url'] for att in msg.get('attachments', [])],
|
||||
'embeds': len(msg.get('embeds', [])),
|
||||
'mentions': [f"{u['username']}#{u['discriminator']}" for u in msg.get('mentions', [])],
|
||||
'edited': msg.get('edited_timestamp') is not None
|
||||
}
|
||||
|
||||
return formatted
|
||||
|
||||
def main():
|
||||
print("User History Scraper")
|
||||
print("=" * 50)
|
||||
|
||||
user_id = input("Enter the user ID to scrape: ").strip()
|
||||
|
||||
if not user_id.isdigit():
|
||||
print("Invalid user ID")
|
||||
return
|
||||
|
||||
print(f"\nScraping messages from user ID: {user_id}")
|
||||
print("=" * 50)
|
||||
|
||||
# Get all mutual guilds
|
||||
guilds = get_mutual_guilds()
|
||||
print(f"Found {len(guilds)} servers")
|
||||
|
||||
all_scraped_messages = {}
|
||||
total_messages = 0
|
||||
username = "Unknown"
|
||||
|
||||
for guild in guilds:
|
||||
guild_id = guild['id']
|
||||
guild_name = guild['name']
|
||||
|
||||
print(f"\nSearching {guild_name}...")
|
||||
|
||||
# Try to get user info from this guild
|
||||
if username == "Unknown":
|
||||
user_info = get_user_info_from_guild(guild_id, user_id)
|
||||
if user_info:
|
||||
username = f"{user_info['username']}#{user_info.get('discriminator', '0')}"
|
||||
print(f" Found user: {username}")
|
||||
|
||||
# Use search API to find all messages from this user in this guild
|
||||
messages = search_user_messages_in_guild(guild_id, user_id)
|
||||
|
||||
if messages:
|
||||
# Organize by channel
|
||||
channels_dict = {}
|
||||
for msg in messages:
|
||||
channel_id = msg['channel_id']
|
||||
if channel_id not in channels_dict:
|
||||
channels_dict[channel_id] = []
|
||||
channels_dict[channel_id].append(msg)
|
||||
|
||||
all_scraped_messages[guild_name] = channels_dict
|
||||
total_messages += len(messages)
|
||||
print(f" Total in {guild_name}: {len(messages)} messages across {len(channels_dict)} channels")
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print(f"Scraping complete. Total messages found: {total_messages}")
|
||||
|
||||
if total_messages == 0:
|
||||
print("No messages found for this user.")
|
||||
return
|
||||
|
||||
# If we still don't have username, try to get it from first message
|
||||
if username == "Unknown":
|
||||
for guild_messages in all_scraped_messages.values():
|
||||
for messages in guild_messages.values():
|
||||
if messages:
|
||||
first_msg = messages[0]
|
||||
author = first_msg['author']
|
||||
username = f"{author['username']}#{author.get('discriminator', '0')}"
|
||||
break
|
||||
if username != "Unknown":
|
||||
break
|
||||
|
||||
print(f"User identified as: {username}")
|
||||
|
||||
# Save to file
|
||||
output_folder = os.path.join(DATA_DIR, f"user_history_{user_id}")
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
|
||||
# Save raw JSON
|
||||
raw_file = os.path.join(output_folder, "raw_messages.json")
|
||||
with open(raw_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_scraped_messages, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Save formatted text file
|
||||
formatted_file = os.path.join(output_folder, "formatted_messages.txt")
|
||||
with open(formatted_file, 'w', encoding='utf-8') as f:
|
||||
f.write(f"Message History for {username}\n")
|
||||
f.write(f"Scraped on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
f.write(f"Total Messages: {total_messages}\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
|
||||
for guild_name, channels in all_scraped_messages.items():
|
||||
f.write(f"\n{'=' * 80}\n")
|
||||
f.write(f"SERVER: {guild_name}\n")
|
||||
f.write(f"{'=' * 80}\n\n")
|
||||
|
||||
for channel_id, messages in channels.items():
|
||||
# Try to get channel name from first message
|
||||
channel_name = f"channel-{channel_id}"
|
||||
if messages:
|
||||
# Channel info might be in message metadata
|
||||
channel_name = messages[0].get('channel_id', channel_id)
|
||||
|
||||
f.write(f"\n--- Channel {channel_name} ({len(messages)} messages) ---\n\n")
|
||||
|
||||
# Sort messages by timestamp
|
||||
sorted_messages = sorted(messages, key=lambda x: x['timestamp'])
|
||||
|
||||
for msg in sorted_messages:
|
||||
formatted = format_message_for_export(msg)
|
||||
f.write(f"[{formatted['timestamp']}]\n")
|
||||
f.write(f"{formatted['content']}\n")
|
||||
|
||||
if formatted['attachments']:
|
||||
f.write(f"Attachments: {', '.join(formatted['attachments'])}\n")
|
||||
|
||||
if formatted['edited']:
|
||||
f.write("(edited)\n")
|
||||
|
||||
f.write("\n")
|
||||
|
||||
# Generate summary stats
|
||||
stats_file = os.path.join(output_folder, "stats.txt")
|
||||
with open(stats_file, 'w', encoding='utf-8') as f:
|
||||
f.write(f"Message Statistics for {username}\n")
|
||||
f.write("=" * 50 + "\n\n")
|
||||
f.write(f"Total Messages: {total_messages}\n")
|
||||
f.write(f"Servers: {len(all_scraped_messages)}\n\n")
|
||||
|
||||
f.write("Messages per server:\n")
|
||||
for guild_name, channels in all_scraped_messages.items():
|
||||
guild_total = sum(len(msgs) for msgs in channels.values())
|
||||
f.write(f" {guild_name}: {guild_total}\n")
|
||||
|
||||
f.write("\nMessages per channel:\n")
|
||||
for guild_name, channels in all_scraped_messages.items():
|
||||
for channel_id, messages in channels.items():
|
||||
f.write(f" {guild_name} > {channel_id}: {len(messages)}\n")
|
||||
|
||||
print(f"\nResults saved to: {output_folder}")
|
||||
print(f" - raw_messages.json (complete data)")
|
||||
print(f" - formatted_messages.txt (readable format)")
|
||||
print(f" - stats.txt (summary)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user