File size: 4,943 Bytes
0acd025 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter
import json
def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting):
try:
# Set default languages if none provided
if not languages:
languages = ['en']
else:
languages = languages.split(',')
# Get transcript
transcript = YouTubeTranscriptApi.get_transcript(
video_id,
languages=languages,
preserve_formatting=preserve_formatting
)
# Translate if requested
if translate_to:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
base_transcript = transcript_list.find_transcript(languages)
transcript = base_transcript.translate(translate_to).fetch()
# Format transcript based on selected format
formatter_map = {
"Text": TextFormatter(),
"JSON": JSONFormatter(),
"WebVTT": WebVTTFormatter(),
"SRT": SRTFormatter()
}
formatter = formatter_map[format_type]
formatted_transcript = formatter.format_transcript(transcript)
return formatted_transcript
except TranscriptsDisabled:
return "Error: Transcripts are disabled for this video"
except NoTranscriptFound:
return "Error: No transcript found for the specified languages"
except Exception as e:
return f"Unexpected error: {str(e)}"
def list_available_transcripts(video_id):
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcripts_info = []
for transcript in transcript_list:
info = {
"Language": transcript.language,
"Code": transcript.language_code,
"Is Generated": transcript.is_generated,
"Is Translatable": transcript.is_translatable,
"Translation Languages": transcript.translation_languages
}
transcripts_info.append(info)
return json.dumps(transcripts_info, indent=2)
except TranscriptsDisabled:
return "Error: Transcripts are disabled for this video"
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="YouTube Transcript Fetcher") as demo:
gr.Markdown("# YouTube Transcript Fetcher")
gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options")
with gr.Tab("Get Transcript"):
with gr.Row():
with gr.Column():
video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
languages_input = gr.Textbox(
label="Languages (comma-separated)",
placeholder="e.g., en,de,es",
value="en"
)
format_dropdown = gr.Dropdown(
choices=["Text", "JSON", "WebVTT", "SRT"],
label="Output Format",
value="Text"
)
translate_dropdown = gr.Dropdown(
choices=["", "en", "de", "es", "fr", "it"],
label="Translate To (optional)",
value=""
)
preserve_formatting = gr.Checkbox(
label="Preserve Formatting",
value=False
)
submit_btn = gr.Button("Get Transcript")
with gr.Column():
output = gr.Textbox(label="Transcript", lines=20)
submit_btn.click(
fn=get_transcript,
inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting],
outputs=output
)
with gr.Tab("List Available Transcripts"):
with gr.Row():
with gr.Column():
list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
list_btn = gr.Button("List Transcripts")
with gr.Column():
list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20)
list_btn.click(
fn=list_available_transcripts,
inputs=list_video_id,
outputs=list_output
)
gr.Markdown("""
### Notes
- Enter a valid YouTube video ID (found in the URL)
- Specify languages as comma-separated codes (e.g., "en,de")
- Choose output format from available options
- Optional: Select a language to translate the transcript to
- Preserve formatting keeps HTML tags if present
""")
demo.launch(
share=True
) |