204 lines
8.0 KiB
Python
Executable File
204 lines
8.0 KiB
Python
Executable File
# discord_tools/scripts/pfp_downloader.py
|
|
import os
|
|
import sys
|
|
import asyncio
|
|
import aiohttp
|
|
import websockets
|
|
import json
|
|
from urllib.parse import urlparse
|
|
|
|
# Add the parent directory to the Python path
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
project_root = os.path.dirname(os.path.dirname(script_dir))
|
|
sys.path.insert(0, project_root)
|
|
|
|
from discord_tools.config.settings import DISCORD_TOKEN, DEFAULT_AVATAR_SIZE, DATA_DIR
|
|
from discord_tools.utils.api_utils import make_discord_request
|
|
|
|
async def download_avatar(session, user, folder):
|
|
# Download their pfp. Shocking, I know.
|
|
if user.get('avatar'):
|
|
avatar_url = f"https://cdn.discordapp.com/avatars/{user['id']}/{user['avatar']}.png?size={DEFAULT_AVATAR_SIZE}"
|
|
filename = f"{user['username']}_{user['id']}.png"
|
|
filepath = os.path.join(folder, filename)
|
|
|
|
async with session.get(avatar_url) as resp:
|
|
if resp.status == 200:
|
|
with open(filepath, 'wb') as f:
|
|
f.write(await resp.read())
|
|
print(f"Downloaded avatar for {user['username']}")
|
|
else:
|
|
print(f"Failed to download avatar for {user['username']}")
|
|
else:
|
|
print(f"No avatar found for {user['username']}")
|
|
|
|
def get_guild_info(guild_id):
|
|
# Get guild info so we know how many people we're dealing with
|
|
endpoint = f'/guilds/{guild_id}?with_counts=true'
|
|
response = make_discord_request('GET', endpoint)
|
|
if not response:
|
|
print(f"Failed to fetch guild info for {guild_id}")
|
|
return None
|
|
return response.json()
|
|
|
|
async def get_guild_members_via_gateway(guild_id):
|
|
# Scrape members via gateway because Discord's API is useless for user tokens
|
|
members = {}
|
|
gateway_url = "wss://gateway.discord.gg/?v=9&encoding=json"
|
|
|
|
# Figure out how many members exist
|
|
guild_info = get_guild_info(guild_id)
|
|
if not guild_info:
|
|
print("Could not get guild info, using default member count estimate")
|
|
approximate_member_count = 200
|
|
else:
|
|
approximate_member_count = guild_info.get('approximate_member_count', 200)
|
|
print(f"Guild has approximately {approximate_member_count} members")
|
|
|
|
async with websockets.connect(gateway_url, max_size=16 * 1024 * 1024) as ws:
|
|
# Get hello, because apparently we need a handshake
|
|
hello = json.loads(await ws.recv())
|
|
heartbeat_interval = hello['d']['heartbeat_interval']
|
|
|
|
# Identify ourselves
|
|
identify = {
|
|
"op": 2,
|
|
"d": {
|
|
"token": DISCORD_TOKEN,
|
|
"properties": {
|
|
"$os": "windows",
|
|
"$browser": "chrome",
|
|
"$device": "pc"
|
|
},
|
|
"compress": False
|
|
}
|
|
}
|
|
await ws.send(json.dumps(identify))
|
|
|
|
# Keep the connection alive or it'll die on us
|
|
async def heartbeat():
|
|
while True:
|
|
await asyncio.sleep(heartbeat_interval / 1000)
|
|
await ws.send(json.dumps({"op": 1, "d": None}))
|
|
|
|
heartbeat_task = asyncio.create_task(heartbeat())
|
|
|
|
# Wait for ready event
|
|
ready = False
|
|
while not ready:
|
|
msg = json.loads(await ws.recv())
|
|
if msg.get('t') == 'READY':
|
|
ready = True
|
|
print("Connected to Discord Gateway")
|
|
|
|
# Build ranges for member scraping (100 at a time because Discord)
|
|
ranges = []
|
|
chunk_size = 100
|
|
for i in range(0, approximate_member_count + chunk_size, chunk_size):
|
|
ranges.append([i, min(i + chunk_size - 1, approximate_member_count)])
|
|
|
|
print(f"Requesting member list in {len(ranges)} chunks...")
|
|
|
|
subscribe = {
|
|
"op": 14,
|
|
"d": {
|
|
"guild_id": guild_id,
|
|
"typing": False,
|
|
"activities": False,
|
|
"threads": False,
|
|
"members": [],
|
|
"channels": {}
|
|
}
|
|
}
|
|
|
|
# Need a channel ID for this to work. Any channel will do.
|
|
channels_endpoint = f'/guilds/{guild_id}/channels'
|
|
channels_response = make_discord_request('GET', channels_endpoint)
|
|
if channels_response:
|
|
channels = channels_response.json()
|
|
if channels:
|
|
first_channel_id = channels[0]['id']
|
|
subscribe['d']['channels'][first_channel_id] = ranges
|
|
print(f"Using channel {first_channel_id} for member scraping")
|
|
|
|
await ws.send(json.dumps(subscribe))
|
|
print(f"Sent subscription request")
|
|
|
|
# Collect members from the member list updates
|
|
timeout = 20
|
|
last_member_time = asyncio.get_event_loop().time()
|
|
|
|
try:
|
|
while True:
|
|
current_time = asyncio.get_event_loop().time()
|
|
remaining_timeout = timeout - (current_time - last_member_time)
|
|
|
|
if remaining_timeout <= 0:
|
|
print("Timeout - no new members received")
|
|
break
|
|
|
|
msg = await asyncio.wait_for(ws.recv(), timeout=remaining_timeout)
|
|
data = json.loads(msg)
|
|
|
|
event_type = data.get('t')
|
|
|
|
if event_type == 'GUILD_MEMBER_LIST_UPDATE':
|
|
ops = data['d'].get('ops', [])
|
|
for op in ops:
|
|
# SYNC ops contain the bulk of members
|
|
if op.get('op') == 'SYNC':
|
|
items = op.get('items', [])
|
|
for item in items:
|
|
if 'member' in item:
|
|
member = item['member']
|
|
user = member.get('user')
|
|
if user:
|
|
members[user['id']] = user
|
|
last_member_time = asyncio.get_event_loop().time()
|
|
# INSERT ops are for individual member additions
|
|
elif op.get('op') == 'INSERT':
|
|
item = op.get('item', {})
|
|
if 'member' in item:
|
|
member = item['member']
|
|
user = member.get('user')
|
|
if user:
|
|
members[user['id']] = user
|
|
last_member_time = asyncio.get_event_loop().time()
|
|
|
|
# Progress update so we know it's not frozen
|
|
if len(members) % 50 == 0 and len(members) > 0:
|
|
print(f"Collected {len(members)} members so far...")
|
|
|
|
except asyncio.TimeoutError:
|
|
print(f"Timeout - collected {len(members)} members total")
|
|
|
|
heartbeat_task.cancel()
|
|
|
|
return members
|
|
|
|
async def download_guild_avatars(guild_id):
|
|
# Main function that does the thing
|
|
folder = os.path.join(DATA_DIR, f"avatars_{guild_id}")
|
|
os.makedirs(folder, exist_ok=True)
|
|
|
|
print("Fetching guild members via Gateway...")
|
|
members = await get_guild_members_via_gateway(guild_id)
|
|
|
|
print(f"\nTotal members found: {len(members)}")
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
tasks = [download_avatar(session, user, folder) for user in members.values()]
|
|
await asyncio.gather(*tasks)
|
|
|
|
print(f"All available avatars downloaded for guild {guild_id}!")
|
|
|
|
def main():
|
|
guild_id = input("Enter the guild ID to download avatars from: ").strip()
|
|
if not guild_id.isdigit():
|
|
print("Invalid guild ID. Please enter a numeric ID.")
|
|
return
|
|
|
|
asyncio.run(download_guild_avatars(guild_id))
|
|
|
|
if __name__ == "__main__":
|
|
main() |