Svngoku's picture
Rename main.py to app.py
d499035 verified
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter
import json
def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting):
try:
# Set default languages if none provided
if not languages:
languages = ['en']
else:
languages = languages.split(',')
# Get transcript
transcript = YouTubeTranscriptApi.get_transcript(
video_id,
languages=languages,
preserve_formatting=preserve_formatting
)
# Translate if requested
if translate_to:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
base_transcript = transcript_list.find_transcript(languages)
transcript = base_transcript.translate(translate_to).fetch()
# Format transcript based on selected format
formatter_map = {
"Text": TextFormatter(),
"JSON": JSONFormatter(),
"WebVTT": WebVTTFormatter(),
"SRT": SRTFormatter()
}
formatter = formatter_map[format_type]
formatted_transcript = formatter.format_transcript(transcript)
return formatted_transcript
except TranscriptsDisabled:
return "Error: Transcripts are disabled for this video"
except NoTranscriptFound:
return "Error: No transcript found for the specified languages"
except Exception as e:
return f"Unexpected error: {str(e)}"
def list_available_transcripts(video_id):
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcripts_info = []
for transcript in transcript_list:
info = {
"Language": transcript.language,
"Code": transcript.language_code,
"Is Generated": transcript.is_generated,
"Is Translatable": transcript.is_translatable,
"Translation Languages": transcript.translation_languages
}
transcripts_info.append(info)
return json.dumps(transcripts_info, indent=2)
except TranscriptsDisabled:
return "Error: Transcripts are disabled for this video"
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="YouTube Transcript Fetcher") as demo:
gr.Markdown("# YouTube Transcript Fetcher")
gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options")
with gr.Tab("Get Transcript"):
with gr.Row():
with gr.Column():
video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
languages_input = gr.Textbox(
label="Languages (comma-separated)",
placeholder="e.g., en,de,es",
value="en"
)
format_dropdown = gr.Dropdown(
choices=["Text", "JSON", "WebVTT", "SRT"],
label="Output Format",
value="Text"
)
translate_dropdown = gr.Dropdown(
choices=["", "en", "de", "es", "fr", "it"],
label="Translate To (optional)",
value=""
)
preserve_formatting = gr.Checkbox(
label="Preserve Formatting",
value=False
)
submit_btn = gr.Button("Get Transcript")
with gr.Column():
output = gr.Textbox(label="Transcript", lines=20)
submit_btn.click(
fn=get_transcript,
inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting],
outputs=output
)
with gr.Tab("List Available Transcripts"):
with gr.Row():
with gr.Column():
list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
list_btn = gr.Button("List Transcripts")
with gr.Column():
list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20)
list_btn.click(
fn=list_available_transcripts,
inputs=list_video_id,
outputs=list_output
)
gr.Markdown("""
### Notes
- Enter a valid YouTube video ID (found in the URL)
- Specify languages as comma-separated codes (e.g., "en,de")
- Choose output format from available options
- Optional: Select a language to translate the transcript to
- Preserve formatting keeps HTML tags if present
""")
demo.launch(
share=True
)