Files
discord_tools/scripts/pfp_downloader.py

204 lines
8.0 KiB
Python
Executable File

# discord_tools/scripts/pfp_downloader.py
import os
import sys
import asyncio
import aiohttp
import websockets
import json
from urllib.parse import urlparse
# Add the parent directory to the Python path
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(script_dir))
sys.path.insert(0, project_root)
from discord_tools.config.settings import DISCORD_TOKEN, DEFAULT_AVATAR_SIZE, DATA_DIR
from discord_tools.utils.api_utils import make_discord_request
async def download_avatar(session, user, folder):
# Download their pfp. Shocking, I know.
if user.get('avatar'):
avatar_url = f"https://cdn.discordapp.com/avatars/{user['id']}/{user['avatar']}.png?size={DEFAULT_AVATAR_SIZE}"
filename = f"{user['username']}_{user['id']}.png"
filepath = os.path.join(folder, filename)
async with session.get(avatar_url) as resp:
if resp.status == 200:
with open(filepath, 'wb') as f:
f.write(await resp.read())
print(f"Downloaded avatar for {user['username']}")
else:
print(f"Failed to download avatar for {user['username']}")
else:
print(f"No avatar found for {user['username']}")
def get_guild_info(guild_id):
# Get guild info so we know how many people we're dealing with
endpoint = f'/guilds/{guild_id}?with_counts=true'
response = make_discord_request('GET', endpoint)
if not response:
print(f"Failed to fetch guild info for {guild_id}")
return None
return response.json()
async def get_guild_members_via_gateway(guild_id):
# Scrape members via gateway because Discord's API is useless for user tokens
members = {}
gateway_url = "wss://gateway.discord.gg/?v=9&encoding=json"
# Figure out how many members exist
guild_info = get_guild_info(guild_id)
if not guild_info:
print("Could not get guild info, using default member count estimate")
approximate_member_count = 200
else:
approximate_member_count = guild_info.get('approximate_member_count', 200)
print(f"Guild has approximately {approximate_member_count} members")
async with websockets.connect(gateway_url, max_size=16 * 1024 * 1024) as ws:
# Get hello, because apparently we need a handshake
hello = json.loads(await ws.recv())
heartbeat_interval = hello['d']['heartbeat_interval']
# Identify ourselves
identify = {
"op": 2,
"d": {
"token": DISCORD_TOKEN,
"properties": {
"$os": "windows",
"$browser": "chrome",
"$device": "pc"
},
"compress": False
}
}
await ws.send(json.dumps(identify))
# Keep the connection alive or it'll die on us
async def heartbeat():
while True:
await asyncio.sleep(heartbeat_interval / 1000)
await ws.send(json.dumps({"op": 1, "d": None}))
heartbeat_task = asyncio.create_task(heartbeat())
# Wait for ready event
ready = False
while not ready:
msg = json.loads(await ws.recv())
if msg.get('t') == 'READY':
ready = True
print("Connected to Discord Gateway")
# Build ranges for member scraping (100 at a time because Discord)
ranges = []
chunk_size = 100
for i in range(0, approximate_member_count + chunk_size, chunk_size):
ranges.append([i, min(i + chunk_size - 1, approximate_member_count)])
print(f"Requesting member list in {len(ranges)} chunks...")
subscribe = {
"op": 14,
"d": {
"guild_id": guild_id,
"typing": False,
"activities": False,
"threads": False,
"members": [],
"channels": {}
}
}
# Need a channel ID for this to work. Any channel will do.
channels_endpoint = f'/guilds/{guild_id}/channels'
channels_response = make_discord_request('GET', channels_endpoint)
if channels_response:
channels = channels_response.json()
if channels:
first_channel_id = channels[0]['id']
subscribe['d']['channels'][first_channel_id] = ranges
print(f"Using channel {first_channel_id} for member scraping")
await ws.send(json.dumps(subscribe))
print(f"Sent subscription request")
# Collect members from the member list updates
timeout = 20
last_member_time = asyncio.get_event_loop().time()
try:
while True:
current_time = asyncio.get_event_loop().time()
remaining_timeout = timeout - (current_time - last_member_time)
if remaining_timeout <= 0:
print("Timeout - no new members received")
break
msg = await asyncio.wait_for(ws.recv(), timeout=remaining_timeout)
data = json.loads(msg)
event_type = data.get('t')
if event_type == 'GUILD_MEMBER_LIST_UPDATE':
ops = data['d'].get('ops', [])
for op in ops:
# SYNC ops contain the bulk of members
if op.get('op') == 'SYNC':
items = op.get('items', [])
for item in items:
if 'member' in item:
member = item['member']
user = member.get('user')
if user:
members[user['id']] = user
last_member_time = asyncio.get_event_loop().time()
# INSERT ops are for individual member additions
elif op.get('op') == 'INSERT':
item = op.get('item', {})
if 'member' in item:
member = item['member']
user = member.get('user')
if user:
members[user['id']] = user
last_member_time = asyncio.get_event_loop().time()
# Progress update so we know it's not frozen
if len(members) % 50 == 0 and len(members) > 0:
print(f"Collected {len(members)} members so far...")
except asyncio.TimeoutError:
print(f"Timeout - collected {len(members)} members total")
heartbeat_task.cancel()
return members
async def download_guild_avatars(guild_id):
# Main function that does the thing
folder = os.path.join(DATA_DIR, f"avatars_{guild_id}")
os.makedirs(folder, exist_ok=True)
print("Fetching guild members via Gateway...")
members = await get_guild_members_via_gateway(guild_id)
print(f"\nTotal members found: {len(members)}")
async with aiohttp.ClientSession() as session:
tasks = [download_avatar(session, user, folder) for user in members.values()]
await asyncio.gather(*tasks)
print(f"All available avatars downloaded for guild {guild_id}!")
def main():
guild_id = input("Enter the guild ID to download avatars from: ").strip()
if not guild_id.isdigit():
print("Invalid guild ID. Please enter a numeric ID.")
return
asyncio.run(download_guild_avatars(guild_id))
if __name__ == "__main__":
main()