|
import gradio as gr |
|
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound |
|
from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter |
|
import json |
|
|
|
def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting): |
|
try: |
|
|
|
if not languages: |
|
languages = ['en'] |
|
else: |
|
languages = languages.split(',') |
|
|
|
|
|
transcript = YouTubeTranscriptApi.get_transcript( |
|
video_id, |
|
languages=languages, |
|
preserve_formatting=preserve_formatting |
|
) |
|
|
|
|
|
if translate_to: |
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
|
base_transcript = transcript_list.find_transcript(languages) |
|
transcript = base_transcript.translate(translate_to).fetch() |
|
|
|
|
|
formatter_map = { |
|
"Text": TextFormatter(), |
|
"JSON": JSONFormatter(), |
|
"WebVTT": WebVTTFormatter(), |
|
"SRT": SRTFormatter() |
|
} |
|
|
|
formatter = formatter_map[format_type] |
|
formatted_transcript = formatter.format_transcript(transcript) |
|
|
|
return formatted_transcript |
|
|
|
except TranscriptsDisabled: |
|
return "Error: Transcripts are disabled for this video" |
|
except NoTranscriptFound: |
|
return "Error: No transcript found for the specified languages" |
|
except Exception as e: |
|
return f"Unexpected error: {str(e)}" |
|
|
|
def list_available_transcripts(video_id): |
|
try: |
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
|
transcripts_info = [] |
|
|
|
for transcript in transcript_list: |
|
info = { |
|
"Language": transcript.language, |
|
"Code": transcript.language_code, |
|
"Is Generated": transcript.is_generated, |
|
"Is Translatable": transcript.is_translatable, |
|
"Translation Languages": transcript.translation_languages |
|
} |
|
transcripts_info.append(info) |
|
|
|
return json.dumps(transcripts_info, indent=2) |
|
except TranscriptsDisabled: |
|
return "Error: Transcripts are disabled for this video" |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
with gr.Blocks(title="YouTube Transcript Fetcher") as demo: |
|
gr.Markdown("# YouTube Transcript Fetcher") |
|
gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options") |
|
|
|
with gr.Tab("Get Transcript"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") |
|
languages_input = gr.Textbox( |
|
label="Languages (comma-separated)", |
|
placeholder="e.g., en,de,es", |
|
value="en" |
|
) |
|
format_dropdown = gr.Dropdown( |
|
choices=["Text", "JSON", "WebVTT", "SRT"], |
|
label="Output Format", |
|
value="Text" |
|
) |
|
translate_dropdown = gr.Dropdown( |
|
choices=["", "en", "de", "es", "fr", "it"], |
|
label="Translate To (optional)", |
|
value="" |
|
) |
|
preserve_formatting = gr.Checkbox( |
|
label="Preserve Formatting", |
|
value=False |
|
) |
|
submit_btn = gr.Button("Get Transcript") |
|
|
|
with gr.Column(): |
|
output = gr.Textbox(label="Transcript", lines=20) |
|
|
|
submit_btn.click( |
|
fn=get_transcript, |
|
inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting], |
|
outputs=output |
|
) |
|
|
|
with gr.Tab("List Available Transcripts"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") |
|
list_btn = gr.Button("List Transcripts") |
|
|
|
with gr.Column(): |
|
list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20) |
|
|
|
list_btn.click( |
|
fn=list_available_transcripts, |
|
inputs=list_video_id, |
|
outputs=list_output |
|
) |
|
|
|
gr.Markdown(""" |
|
### Notes |
|
- Enter a valid YouTube video ID (found in the URL) |
|
- Specify languages as comma-separated codes (e.g., "en,de") |
|
- Choose output format from available options |
|
- Optional: Select a language to translate the transcript to |
|
- Preserve formatting keeps HTML tags if present |
|
""") |
|
|
|
demo.launch( |
|
share=True |
|
) |