210 lines
6.9 KiB
Python
Executable File
210 lines
6.9 KiB
Python
Executable File
# discord_tools/scripts/image_downloader.py
|
|
|
|
import os
|
|
import sys
|
|
import requests
|
|
from datetime import datetime
|
|
|
|
# Add the parent directory to the Python path
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
project_root = os.path.dirname(os.path.dirname(script_dir))
|
|
sys.path.insert(0, project_root)
|
|
|
|
from discord_tools.utils.api_utils import make_discord_request
|
|
from discord_tools.config.settings import ERROR_MESSAGES
|
|
|
|
# Image file extensions to look for
|
|
IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp')
|
|
|
|
def fetch_messages(channel_id, before=None, limit=100):
|
|
"""
|
|
Fetch messages from a Discord channel.
|
|
|
|
:param channel_id: The channel ID to fetch messages from
|
|
:param before: Message ID to fetch messages before (for pagination)
|
|
:param limit: Number of messages to fetch (max 100)
|
|
:return: List of messages or None if the request failed
|
|
"""
|
|
endpoint = f"/channels/{channel_id}/messages"
|
|
params = {"limit": limit}
|
|
|
|
if before:
|
|
params["before"] = before
|
|
|
|
response = make_discord_request('GET', endpoint, params=params)
|
|
|
|
if response:
|
|
return response.json()
|
|
return None
|
|
|
|
def extract_images_from_messages(messages, user_id=None):
|
|
"""
|
|
Extract all image URLs from a list of messages.
|
|
|
|
:param messages: List of Discord message objects
|
|
:param user_id: Optional user ID to filter messages by
|
|
:return: List of tuples (image_url, filename, message_id, timestamp)
|
|
"""
|
|
images = []
|
|
|
|
for message in messages:
|
|
# Filter by user if specified
|
|
if user_id and message.get('author', {}).get('id') != user_id:
|
|
continue
|
|
|
|
message_id = message.get('id')
|
|
timestamp = message.get('timestamp', '')
|
|
|
|
# Check attachments
|
|
for attachment in message.get('attachments', []):
|
|
url = attachment.get('url')
|
|
filename = attachment.get('filename', 'unknown')
|
|
|
|
# Check if it's an image
|
|
if url and filename.lower().endswith(IMAGE_EXTENSIONS):
|
|
images.append((url, filename, message_id, timestamp))
|
|
|
|
# Check embeds for images
|
|
for embed in message.get('embeds', []):
|
|
# Embed image
|
|
if embed.get('type') == 'image' and embed.get('thumbnail'):
|
|
url = embed['thumbnail'].get('url')
|
|
if url:
|
|
filename = f"embed_{message_id}_{url.split('/')[-1]}"
|
|
images.append((url, filename, message_id, timestamp))
|
|
|
|
# Embed image field
|
|
if embed.get('image'):
|
|
url = embed['image'].get('url')
|
|
if url:
|
|
filename = f"embed_{message_id}_{url.split('/')[-1]}"
|
|
images.append((url, filename, message_id, timestamp))
|
|
|
|
return images
|
|
|
|
def download_image(url, filepath):
|
|
"""
|
|
Download an image from a URL to a local file.
|
|
|
|
:param url: Image URL
|
|
:param filepath: Local file path to save the image
|
|
:return: True if successful, False otherwise
|
|
"""
|
|
try:
|
|
response = requests.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
with open(filepath, 'wb') as f:
|
|
f.write(response.content)
|
|
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to download {url}: {e}")
|
|
return False
|
|
|
|
def download_all_images(channel_id, output_dir=None, user_id=None):
|
|
"""
|
|
Download all images from a Discord channel.
|
|
|
|
:param channel_id: The channel ID to download images from
|
|
:param output_dir: Directory to save images (defaults to project_root/data/images/{channel_id})
|
|
:param user_id: Optional user ID to filter images by specific user
|
|
:return: Number of images downloaded
|
|
"""
|
|
# Set up output directory
|
|
if output_dir is None:
|
|
# Use the project root data folder
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
project_root = os.path.dirname(script_dir)
|
|
output_dir = os.path.join(project_root, "data", "images", channel_id)
|
|
|
|
# Add user ID to path if filtering by user
|
|
if user_id:
|
|
output_dir = os.path.join(output_dir, f"user_{user_id}")
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
if user_id:
|
|
print(f"Fetching messages from channel {channel_id} (filtering by user {user_id})...")
|
|
else:
|
|
print(f"Fetching messages from channel {channel_id}...")
|
|
|
|
all_images = []
|
|
before = None
|
|
total_messages = 0
|
|
|
|
# Fetch all messages with pagination
|
|
while True:
|
|
messages = fetch_messages(channel_id, before=before, limit=100)
|
|
|
|
if not messages:
|
|
if total_messages == 0:
|
|
print(ERROR_MESSAGES.get("api_error", "Failed to fetch messages"))
|
|
return 0
|
|
break
|
|
|
|
if len(messages) == 0:
|
|
break
|
|
|
|
total_messages += len(messages)
|
|
print(f"Fetched {total_messages} messages so far...")
|
|
|
|
# Extract images from these messages
|
|
images = extract_images_from_messages(messages, user_id)
|
|
all_images.extend(images)
|
|
|
|
# Set before to the last message ID for pagination
|
|
before = messages[-1]['id']
|
|
|
|
# If we got fewer than 100 messages, we've reached the end
|
|
if len(messages) < 100:
|
|
break
|
|
|
|
print(f"\nFound {len(all_images)} images in {total_messages} messages")
|
|
|
|
if len(all_images) == 0:
|
|
print("No images to download.")
|
|
return 0
|
|
|
|
# Download all images
|
|
print(f"\nDownloading images to {output_dir}...\n")
|
|
|
|
downloaded = 0
|
|
for i, (url, filename, message_id, timestamp) in enumerate(all_images, 1):
|
|
# Create a unique filename with timestamp and message ID
|
|
name, ext = os.path.splitext(filename)
|
|
safe_filename = f"{i:04d}_{message_id}_{name}{ext}"
|
|
filepath = os.path.join(output_dir, safe_filename)
|
|
|
|
print(f"[{i}/{len(all_images)}] Downloading {filename}...")
|
|
|
|
if download_image(url, filepath):
|
|
downloaded += 1
|
|
|
|
print(f"\nSuccessfully downloaded {downloaded}/{len(all_images)} images")
|
|
print(f"Images saved to: {os.path.abspath(output_dir)}")
|
|
|
|
return downloaded
|
|
|
|
def main():
|
|
print("Discord Image Downloader")
|
|
print("=" * 50)
|
|
|
|
channel_id = input("Enter the channel ID: ").strip()
|
|
|
|
if not channel_id:
|
|
print("Error: Channel ID cannot be empty")
|
|
return
|
|
|
|
user_id = input("Enter user ID to filter by (press Enter to download from all users): ").strip()
|
|
user_id = user_id if user_id else None
|
|
|
|
custom_dir = input("Enter output directory (press Enter for default): ").strip()
|
|
output_dir = custom_dir if custom_dir else None
|
|
|
|
print()
|
|
download_all_images(channel_id, output_dir, user_id)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|