# discord_tools/scripts/pfp_downloader.py import os import sys import asyncio import aiohttp import websockets import json from urllib.parse import urlparse # Add the parent directory to the Python path script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(os.path.dirname(script_dir)) sys.path.insert(0, project_root) from discord_tools.config.settings import DISCORD_TOKEN, DEFAULT_AVATAR_SIZE, DATA_DIR from discord_tools.utils.api_utils import make_discord_request async def download_avatar(session, user, folder): # Download their pfp. Shocking, I know. if user.get('avatar'): avatar_url = f"https://cdn.discordapp.com/avatars/{user['id']}/{user['avatar']}.png?size={DEFAULT_AVATAR_SIZE}" filename = f"{user['username']}_{user['id']}.png" filepath = os.path.join(folder, filename) async with session.get(avatar_url) as resp: if resp.status == 200: with open(filepath, 'wb') as f: f.write(await resp.read()) print(f"Downloaded avatar for {user['username']}") else: print(f"Failed to download avatar for {user['username']}") else: print(f"No avatar found for {user['username']}") def get_guild_info(guild_id): # Get guild info so we know how many people we're dealing with endpoint = f'/guilds/{guild_id}?with_counts=true' response = make_discord_request('GET', endpoint) if not response: print(f"Failed to fetch guild info for {guild_id}") return None return response.json() async def get_guild_members_via_gateway(guild_id): # Scrape members via gateway because Discord's API is useless for user tokens members = {} gateway_url = "wss://gateway.discord.gg/?v=9&encoding=json" # Figure out how many members exist guild_info = get_guild_info(guild_id) if not guild_info: print("Could not get guild info, using default member count estimate") approximate_member_count = 200 else: approximate_member_count = guild_info.get('approximate_member_count', 200) print(f"Guild has approximately {approximate_member_count} members") async with websockets.connect(gateway_url, max_size=16 * 1024 * 1024) as ws: # Get hello, because apparently we need a handshake hello = json.loads(await ws.recv()) heartbeat_interval = hello['d']['heartbeat_interval'] # Identify ourselves identify = { "op": 2, "d": { "token": DISCORD_TOKEN, "properties": { "$os": "windows", "$browser": "chrome", "$device": "pc" }, "compress": False } } await ws.send(json.dumps(identify)) # Keep the connection alive or it'll die on us async def heartbeat(): while True: await asyncio.sleep(heartbeat_interval / 1000) await ws.send(json.dumps({"op": 1, "d": None})) heartbeat_task = asyncio.create_task(heartbeat()) # Wait for ready event ready = False while not ready: msg = json.loads(await ws.recv()) if msg.get('t') == 'READY': ready = True print("Connected to Discord Gateway") # Build ranges for member scraping (100 at a time because Discord) ranges = [] chunk_size = 100 for i in range(0, approximate_member_count + chunk_size, chunk_size): ranges.append([i, min(i + chunk_size - 1, approximate_member_count)]) print(f"Requesting member list in {len(ranges)} chunks...") subscribe = { "op": 14, "d": { "guild_id": guild_id, "typing": False, "activities": False, "threads": False, "members": [], "channels": {} } } # Need a channel ID for this to work. Any channel will do. channels_endpoint = f'/guilds/{guild_id}/channels' channels_response = make_discord_request('GET', channels_endpoint) if channels_response: channels = channels_response.json() if channels: first_channel_id = channels[0]['id'] subscribe['d']['channels'][first_channel_id] = ranges print(f"Using channel {first_channel_id} for member scraping") await ws.send(json.dumps(subscribe)) print(f"Sent subscription request") # Collect members from the member list updates timeout = 20 last_member_time = asyncio.get_event_loop().time() try: while True: current_time = asyncio.get_event_loop().time() remaining_timeout = timeout - (current_time - last_member_time) if remaining_timeout <= 0: print("Timeout - no new members received") break msg = await asyncio.wait_for(ws.recv(), timeout=remaining_timeout) data = json.loads(msg) event_type = data.get('t') if event_type == 'GUILD_MEMBER_LIST_UPDATE': ops = data['d'].get('ops', []) for op in ops: # SYNC ops contain the bulk of members if op.get('op') == 'SYNC': items = op.get('items', []) for item in items: if 'member' in item: member = item['member'] user = member.get('user') if user: members[user['id']] = user last_member_time = asyncio.get_event_loop().time() # INSERT ops are for individual member additions elif op.get('op') == 'INSERT': item = op.get('item', {}) if 'member' in item: member = item['member'] user = member.get('user') if user: members[user['id']] = user last_member_time = asyncio.get_event_loop().time() # Progress update so we know it's not frozen if len(members) % 50 == 0 and len(members) > 0: print(f"Collected {len(members)} members so far...") except asyncio.TimeoutError: print(f"Timeout - collected {len(members)} members total") heartbeat_task.cancel() return members async def download_guild_avatars(guild_id): # Main function that does the thing folder = os.path.join(DATA_DIR, f"avatars_{guild_id}") os.makedirs(folder, exist_ok=True) print("Fetching guild members via Gateway...") members = await get_guild_members_via_gateway(guild_id) print(f"\nTotal members found: {len(members)}") async with aiohttp.ClientSession() as session: tasks = [download_avatar(session, user, folder) for user in members.values()] await asyncio.gather(*tasks) print(f"All available avatars downloaded for guild {guild_id}!") def main(): guild_id = input("Enter the guild ID to download avatars from: ").strip() if not guild_id.isdigit(): print("Invalid guild ID. Please enter a numeric ID.") return asyncio.run(download_guild_avatars(guild_id)) if __name__ == "__main__": main()