import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter import json def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting): try: # Set default languages if none provided if not languages: languages = ['en'] else: languages = languages.split(',') # Get transcript transcript = YouTubeTranscriptApi.get_transcript( video_id, languages=languages, preserve_formatting=preserve_formatting ) # Translate if requested if translate_to: transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) base_transcript = transcript_list.find_transcript(languages) transcript = base_transcript.translate(translate_to).fetch() # Format transcript based on selected format formatter_map = { "Text": TextFormatter(), "JSON": JSONFormatter(), "WebVTT": WebVTTFormatter(), "SRT": SRTFormatter() } formatter = formatter_map[format_type] formatted_transcript = formatter.format_transcript(transcript) return formatted_transcript except TranscriptsDisabled: return "Error: Transcripts are disabled for this video" except NoTranscriptFound: return "Error: No transcript found for the specified languages" except Exception as e: return f"Unexpected error: {str(e)}" def list_available_transcripts(video_id): try: transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) transcripts_info = [] for transcript in transcript_list: info = { "Language": transcript.language, "Code": transcript.language_code, "Is Generated": transcript.is_generated, "Is Translatable": transcript.is_translatable, "Translation Languages": transcript.translation_languages } transcripts_info.append(info) return json.dumps(transcripts_info, indent=2) except TranscriptsDisabled: return "Error: Transcripts are disabled for this video" except Exception as e: return f"Error: {str(e)}" # Create Gradio interface with gr.Blocks(title="YouTube Transcript Fetcher") as demo: gr.Markdown("# YouTube Transcript Fetcher") gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options") with gr.Tab("Get Transcript"): with gr.Row(): with gr.Column(): video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") languages_input = gr.Textbox( label="Languages (comma-separated)", placeholder="e.g., en,de,es", value="en" ) format_dropdown = gr.Dropdown( choices=["Text", "JSON", "WebVTT", "SRT"], label="Output Format", value="Text" ) translate_dropdown = gr.Dropdown( choices=["", "en", "de", "es", "fr", "it"], label="Translate To (optional)", value="" ) preserve_formatting = gr.Checkbox( label="Preserve Formatting", value=False ) submit_btn = gr.Button("Get Transcript") with gr.Column(): output = gr.Textbox(label="Transcript", lines=20) submit_btn.click( fn=get_transcript, inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting], outputs=output ) with gr.Tab("List Available Transcripts"): with gr.Row(): with gr.Column(): list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") list_btn = gr.Button("List Transcripts") with gr.Column(): list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20) list_btn.click( fn=list_available_transcripts, inputs=list_video_id, outputs=list_output ) gr.Markdown(""" ### Notes - Enter a valid YouTube video ID (found in the URL) - Specify languages as comma-separated codes (e.g., "en,de") - Choose output format from available options - Optional: Select a language to translate the transcript to - Preserve formatting keeps HTML tags if present """) demo.launch( share=True )