# discord_tools/scripts/image_downloader.py import os import sys import requests from datetime import datetime # Add the parent directory to the Python path script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(os.path.dirname(script_dir)) sys.path.insert(0, project_root) from discord_tools.utils.api_utils import make_discord_request from discord_tools.config.settings import ERROR_MESSAGES # Image file extensions to look for IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp') def fetch_messages(channel_id, before=None, limit=100): """ Fetch messages from a Discord channel. :param channel_id: The channel ID to fetch messages from :param before: Message ID to fetch messages before (for pagination) :param limit: Number of messages to fetch (max 100) :return: List of messages or None if the request failed """ endpoint = f"/channels/{channel_id}/messages" params = {"limit": limit} if before: params["before"] = before response = make_discord_request('GET', endpoint, params=params) if response: return response.json() return None def extract_images_from_messages(messages, user_id=None): """ Extract all image URLs from a list of messages. :param messages: List of Discord message objects :param user_id: Optional user ID to filter messages by :return: List of tuples (image_url, filename, message_id, timestamp) """ images = [] for message in messages: # Filter by user if specified if user_id and message.get('author', {}).get('id') != user_id: continue message_id = message.get('id') timestamp = message.get('timestamp', '') # Check attachments for attachment in message.get('attachments', []): url = attachment.get('url') filename = attachment.get('filename', 'unknown') # Check if it's an image if url and filename.lower().endswith(IMAGE_EXTENSIONS): images.append((url, filename, message_id, timestamp)) # Check embeds for images for embed in message.get('embeds', []): # Embed image if embed.get('type') == 'image' and embed.get('thumbnail'): url = embed['thumbnail'].get('url') if url: filename = f"embed_{message_id}_{url.split('/')[-1]}" images.append((url, filename, message_id, timestamp)) # Embed image field if embed.get('image'): url = embed['image'].get('url') if url: filename = f"embed_{message_id}_{url.split('/')[-1]}" images.append((url, filename, message_id, timestamp)) return images def download_image(url, filepath): """ Download an image from a URL to a local file. :param url: Image URL :param filepath: Local file path to save the image :return: True if successful, False otherwise """ try: response = requests.get(url, timeout=30) response.raise_for_status() with open(filepath, 'wb') as f: f.write(response.content) return True except Exception as e: print(f"Failed to download {url}: {e}") return False def download_all_images(channel_id, output_dir=None, user_id=None): """ Download all images from a Discord channel. :param channel_id: The channel ID to download images from :param output_dir: Directory to save images (defaults to project_root/data/images/{channel_id}) :param user_id: Optional user ID to filter images by specific user :return: Number of images downloaded """ # Set up output directory if output_dir is None: # Use the project root data folder script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) output_dir = os.path.join(project_root, "data", "images", channel_id) # Add user ID to path if filtering by user if user_id: output_dir = os.path.join(output_dir, f"user_{user_id}") os.makedirs(output_dir, exist_ok=True) if user_id: print(f"Fetching messages from channel {channel_id} (filtering by user {user_id})...") else: print(f"Fetching messages from channel {channel_id}...") all_images = [] before = None total_messages = 0 # Fetch all messages with pagination while True: messages = fetch_messages(channel_id, before=before, limit=100) if not messages: if total_messages == 0: print(ERROR_MESSAGES.get("api_error", "Failed to fetch messages")) return 0 break if len(messages) == 0: break total_messages += len(messages) print(f"Fetched {total_messages} messages so far...") # Extract images from these messages images = extract_images_from_messages(messages, user_id) all_images.extend(images) # Set before to the last message ID for pagination before = messages[-1]['id'] # If we got fewer than 100 messages, we've reached the end if len(messages) < 100: break print(f"\nFound {len(all_images)} images in {total_messages} messages") if len(all_images) == 0: print("No images to download.") return 0 # Download all images print(f"\nDownloading images to {output_dir}...\n") downloaded = 0 for i, (url, filename, message_id, timestamp) in enumerate(all_images, 1): # Create a unique filename with timestamp and message ID name, ext = os.path.splitext(filename) safe_filename = f"{i:04d}_{message_id}_{name}{ext}" filepath = os.path.join(output_dir, safe_filename) print(f"[{i}/{len(all_images)}] Downloading {filename}...") if download_image(url, filepath): downloaded += 1 print(f"\nSuccessfully downloaded {downloaded}/{len(all_images)} images") print(f"Images saved to: {os.path.abspath(output_dir)}") return downloaded def main(): print("Discord Image Downloader") print("=" * 50) channel_id = input("Enter the channel ID: ").strip() if not channel_id: print("Error: Channel ID cannot be empty") return user_id = input("Enter user ID to filter by (press Enter to download from all users): ").strip() user_id = user_id if user_id else None custom_dir = input("Enter output directory (press Enter for default): ").strip() output_dir = custom_dir if custom_dir else None print() download_all_images(channel_id, output_dir, user_id) if __name__ == "__main__": main()