import subprocess import json import os import sys import glob def get_subtitle_streams(video_path): """Get information about subtitle streams in the video file.""" try: cmd = [ 'ffprobe', '-v', 'error', '-print_format', 'json', '-show_streams', '-select_streams', 's', video_path ] result = subprocess.run(cmd, capture_output=True, text=True, check=True) streams = json.loads(result.stdout).get('streams', []) return streams except subprocess.CalledProcessError as e: print(f"Error probing video file '{video_path}': {e.stderr}") return [] except json.JSONDecodeError as e: print(f"Error parsing ffprobe output for '{video_path}': {e}") return [] def extract_subtitles(video_path): """Extract all subtitle streams from a single video file in their original format.""" # Get the directory of the input video output_dir = os.path.dirname(video_path) or '.' # Get subtitle streams subtitle_streams = get_subtitle_streams(video_path) if not subtitle_streams: print(f"No subtitle streams found in '{video_path}'.") return # Get the base name of the video file (without extension) video_name = os.path.splitext(os.path.basename(video_path))[0] # Map codec names to standard file extensions codec_to_extension = { 'subrip': 'srt', 'ass': 'ass', 'webvtt': 'vtt', 'srt': 'srt', # In case codec is already named srt # Add more mappings as needed } # Extract each subtitle stream for index, stream in enumerate(subtitle_streams): codec = stream.get('codec_name', 'unknown') lang = stream.get('tags', {}).get('language', 'unknown') # Use mapped extension if available, otherwise use codec name extension = codec_to_extension.get(codec, codec) output_file = os.path.join(output_dir, f"{video_name}.{lang}.{extension}") try: cmd = [ 'ffmpeg', '-i', video_path, '-map', f'0:s:{index}', '-c:s', 'copy', output_file ] result = subprocess.run(cmd, capture_output=True, text=True, check=True) print(f"Extracted subtitle stream {index} ({lang}, {codec}) to {output_file}") except subprocess.CalledProcessError as e: print(f"Error extracting subtitle stream {index} from '{video_path}' with copy: {e.stderr}") # Fallback: Try extracting without copy try: cmd = [ 'ffmpeg', '-i', video_path, '-map', f'0:s:{index}', output_file ] result = subprocess.run(cmd, capture_output=True, text=True, check=True) print(f"Fallback: Extracted subtitle stream {index} ({lang}, {codec}) to {output_file} without copy") except subprocess.CalledProcessError as e: print(f"Fallback failed for subtitle stream {index} from '{video_path}': {e.stderr}") def process_input(input_path): """Process a single file or a folder containing video files.""" # Supported video extensions video_extensions = ['*.mp4', '*.mkv', '*.avi', '*.mov', '*.wmv', '*.flv'] if os.path.isfile(input_path): # Process single video file if any(input_path.lower().endswith(ext[1:]) for ext in video_extensions): extract_subtitles(input_path) else: print(f"Skipping '{input_path}': Not a recognized video file extension.") elif os.path.isdir(input_path): # Process all video files in the folder video_files = [] for ext in video_extensions: video_files.extend(glob.glob(os.path.join(input_path, ext))) if not video_files: print(f"No video files found in folder '{input_path}'.") return for video_file in video_files: print(f"\nProcessing '{video_file}'...") extract_subtitles(video_file) else: print(f"Error: '{input_path}' is neither a valid file nor a directory.") def main(): if len(sys.argv) < 2: print("Usage: python extract_subtitles.py ") sys.exit(1) input_path = sys.argv[1] if not os.path.exists(input_path): print(f"Error: Path '{input_path}' does not exist.") sys.exit(1) process_input(input_path) if __name__ == "__main__": main()