Svngoku commited on
Commit
0acd025
·
verified ·
1 Parent(s): 250e8a5

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +133 -0
main.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
3
+ from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter
4
+ import json
5
+
6
+ def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting):
7
+ try:
8
+ # Set default languages if none provided
9
+ if not languages:
10
+ languages = ['en']
11
+ else:
12
+ languages = languages.split(',')
13
+
14
+ # Get transcript
15
+ transcript = YouTubeTranscriptApi.get_transcript(
16
+ video_id,
17
+ languages=languages,
18
+ preserve_formatting=preserve_formatting
19
+ )
20
+
21
+ # Translate if requested
22
+ if translate_to:
23
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
24
+ base_transcript = transcript_list.find_transcript(languages)
25
+ transcript = base_transcript.translate(translate_to).fetch()
26
+
27
+ # Format transcript based on selected format
28
+ formatter_map = {
29
+ "Text": TextFormatter(),
30
+ "JSON": JSONFormatter(),
31
+ "WebVTT": WebVTTFormatter(),
32
+ "SRT": SRTFormatter()
33
+ }
34
+
35
+ formatter = formatter_map[format_type]
36
+ formatted_transcript = formatter.format_transcript(transcript)
37
+
38
+ return formatted_transcript
39
+
40
+ except TranscriptsDisabled:
41
+ return "Error: Transcripts are disabled for this video"
42
+ except NoTranscriptFound:
43
+ return "Error: No transcript found for the specified languages"
44
+ except Exception as e:
45
+ return f"Unexpected error: {str(e)}"
46
+
47
+ def list_available_transcripts(video_id):
48
+ try:
49
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
50
+ transcripts_info = []
51
+
52
+ for transcript in transcript_list:
53
+ info = {
54
+ "Language": transcript.language,
55
+ "Code": transcript.language_code,
56
+ "Is Generated": transcript.is_generated,
57
+ "Is Translatable": transcript.is_translatable,
58
+ "Translation Languages": transcript.translation_languages
59
+ }
60
+ transcripts_info.append(info)
61
+
62
+ return json.dumps(transcripts_info, indent=2)
63
+ except TranscriptsDisabled:
64
+ return "Error: Transcripts are disabled for this video"
65
+ except Exception as e:
66
+ return f"Error: {str(e)}"
67
+
68
+ # Create Gradio interface
69
+ with gr.Blocks(title="YouTube Transcript Fetcher") as demo:
70
+ gr.Markdown("# YouTube Transcript Fetcher")
71
+ gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options")
72
+
73
+ with gr.Tab("Get Transcript"):
74
+ with gr.Row():
75
+ with gr.Column():
76
+ video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
77
+ languages_input = gr.Textbox(
78
+ label="Languages (comma-separated)",
79
+ placeholder="e.g., en,de,es",
80
+ value="en"
81
+ )
82
+ format_dropdown = gr.Dropdown(
83
+ choices=["Text", "JSON", "WebVTT", "SRT"],
84
+ label="Output Format",
85
+ value="Text"
86
+ )
87
+ translate_dropdown = gr.Dropdown(
88
+ choices=["", "en", "de", "es", "fr", "it"],
89
+ label="Translate To (optional)",
90
+ value=""
91
+ )
92
+ preserve_formatting = gr.Checkbox(
93
+ label="Preserve Formatting",
94
+ value=False
95
+ )
96
+ submit_btn = gr.Button("Get Transcript")
97
+
98
+ with gr.Column():
99
+ output = gr.Textbox(label="Transcript", lines=20)
100
+
101
+ submit_btn.click(
102
+ fn=get_transcript,
103
+ inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting],
104
+ outputs=output
105
+ )
106
+
107
+ with gr.Tab("List Available Transcripts"):
108
+ with gr.Row():
109
+ with gr.Column():
110
+ list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
111
+ list_btn = gr.Button("List Transcripts")
112
+
113
+ with gr.Column():
114
+ list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20)
115
+
116
+ list_btn.click(
117
+ fn=list_available_transcripts,
118
+ inputs=list_video_id,
119
+ outputs=list_output
120
+ )
121
+
122
+ gr.Markdown("""
123
+ ### Notes
124
+ - Enter a valid YouTube video ID (found in the URL)
125
+ - Specify languages as comma-separated codes (e.g., "en,de")
126
+ - Choose output format from available options
127
+ - Optional: Select a language to translate the transcript to
128
+ - Preserve formatting keeps HTML tags if present
129
+ """)
130
+
131
+ demo.launch(
132
+ share=True
133
+ )