discord_tools/scripts/video_fetcher.py

# discord_tools/scripts/video_downloader.py

import os
import sys
import requests
from datetime import datetime

# Add the parent directory to the Python path
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(script_dir))
sys.path.insert(0, project_root)

from discord_tools.utils.api_utils import make_discord_request
from discord_tools.config.settings import ERROR_MESSAGES

# Video file extensions to look for
VIDEO_EXTENSIONS = ('.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v', '.mpeg', '.mpg', '.3gp', '.ogv')

def fetch_messages(channel_id, before=None, limit=100):
    """
    Fetch messages from a Discord channel.

    :param channel_id: The channel ID to fetch messages from
    :param before: Message ID to fetch messages before (for pagination)
    :param limit: Number of messages to fetch (max 100)
    :return: List of messages or None if the request failed
    """
    endpoint = f"/channels/{channel_id}/messages"
    params = {"limit": limit}

    if before:
        params["before"] = before

    response = make_discord_request('GET', endpoint, params=params)

    if response:
        return response.json()
    return None

def extract_videos_from_messages(messages, user_id=None):
    """
    Extract all video URLs from a list of messages.

    :param messages: List of Discord message objects
    :param user_id: Optional user ID to filter messages by
    :return: List of tuples (video_url, filename, message_id, timestamp)
    """
    videos = []

    for message in messages:
        # Filter by user if specified
        if user_id and message.get('author', {}).get('id') != user_id:
            continue

        message_id = message.get('id')
        timestamp = message.get('timestamp', '')

        # Check attachments
        for attachment in message.get('attachments', []):
            url = attachment.get('url')
            filename = attachment.get('filename', 'unknown')

            # Check if it's a video
            if url and filename.lower().endswith(VIDEO_EXTENSIONS):
                videos.append((url, filename, message_id, timestamp))

        # Check embeds for videos
        for embed in message.get('embeds', []):
            # Embed video
            if embed.get('type') == 'video' and embed.get('video'):
                url = embed['video'].get('url')
                if url:
                    filename = f"embed_{message_id}_{url.split('/')[-1]}"
                    videos.append((url, filename, message_id, timestamp))

    return videos

def download_video(url, filepath):
    """
    Download a video from a URL to a local file.

    :param url: Video URL
    :param filepath: Local file path to save the video
    :return: True if successful, False otherwise
    """
    try:
        print(f"  Downloading from {url}...")
        response = requests.get(url, timeout=60, stream=True)
        response.raise_for_status()

        # Download in chunks for large files
        total_size = int(response.headers.get('content-length', 0))
        downloaded = 0

        with open(filepath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    downloaded += len(chunk)
                    if total_size > 0:
                        percent = (downloaded / total_size) * 100
                        print(f"  Progress: {percent:.1f}%", end='\r')

        if total_size > 0:
            print(f"  Progress: 100.0%")

        return True
    except Exception as e:
        print(f"  Failed to download {url}: {e}")
        return False

def download_all_videos(channel_id, output_dir=None, user_id=None):
    """
    Download all videos from a Discord channel.

    :param channel_id: The channel ID to download videos from
    :param output_dir: Directory to save videos (defaults to project_root/data/videos/{channel_id})
    :param user_id: Optional user ID to filter videos by specific user
    :return: Number of videos downloaded
    """
    # Set up output directory
    if output_dir is None:
        # Use the project root data folder
        script_dir = os.path.dirname(os.path.abspath(__file__))
        project_root = os.path.dirname(script_dir)
        output_dir = os.path.join(project_root, "data", "videos", channel_id)

        # Add user ID to path if filtering by user
        if user_id:
            output_dir = os.path.join(output_dir, f"user_{user_id}")

    os.makedirs(output_dir, exist_ok=True)

    if user_id:
        print(f"Fetching messages from channel {channel_id} (filtering by user {user_id})...")
    else:
        print(f"Fetching messages from channel {channel_id}...")

    all_videos = []
    before = None
    total_messages = 0

    # Fetch all messages with pagination
    while True:
        messages = fetch_messages(channel_id, before=before, limit=100)

        if not messages:
            if total_messages == 0:
                print(ERROR_MESSAGES.get("api_error", "Failed to fetch messages"))
                return 0
            break

        if len(messages) == 0:
            break

        total_messages += len(messages)
        print(f"Fetched {total_messages} messages so far...")

        # Extract videos from these messages
        videos = extract_videos_from_messages(messages, user_id)
        all_videos.extend(videos)

        # Set before to the last message ID for pagination
        before = messages[-1]['id']

        # If we got fewer than 100 messages, we've reached the end
        if len(messages) < 100:
            break

    print(f"\nFound {len(all_videos)} videos in {total_messages} messages")

    if len(all_videos) == 0:
        print("No videos to download.")
        return 0

    # Download all videos
    print(f"\nDownloading videos to {output_dir}...\n")

    downloaded = 0
    for i, (url, filename, message_id, timestamp) in enumerate(all_videos, 1):
        # Create a unique filename with timestamp and message ID
        name, ext = os.path.splitext(filename)
        safe_filename = f"{i:04d}_{message_id}_{name}{ext}"
        filepath = os.path.join(output_dir, safe_filename)

        print(f"[{i}/{len(all_videos)}] Downloading {filename}...")

        if download_video(url, filepath):
            downloaded += 1
            file_size = os.path.getsize(filepath) / (1024 * 1024)  # MB
            print(f"  Saved: {safe_filename} ({file_size:.2f} MB)\n")

    print(f"\nSuccessfully downloaded {downloaded}/{len(all_videos)} videos")
    print(f"Videos saved to: {os.path.abspath(output_dir)}")

    return downloaded

def main():
    print("Discord Video Downloader")
    print("=" * 50)

    channel_id = input("Enter the channel ID: ").strip()

    if not channel_id:
        print("Error: Channel ID cannot be empty")
        return

    user_id = input("Enter user ID to filter by (press Enter to download from all users): ").strip()
    user_id = user_id if user_id else None

    custom_dir = input("Enter output directory (press Enter for default): ").strip()
    output_dir = custom_dir if custom_dir else None

    print()
    download_all_videos(channel_id, output_dir, user_id)

if __name__ == "__main__":
    main()