File size: 4,943 Bytes
0acd025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter
import json

def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting):
    try:
        # Set default languages if none provided
        if not languages:
            languages = ['en']
        else:
            languages = languages.split(',')

        # Get transcript
        transcript = YouTubeTranscriptApi.get_transcript(
            video_id,
            languages=languages,
            preserve_formatting=preserve_formatting
        )

        # Translate if requested
        if translate_to:
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
            base_transcript = transcript_list.find_transcript(languages)
            transcript = base_transcript.translate(translate_to).fetch()

        # Format transcript based on selected format
        formatter_map = {
            "Text": TextFormatter(),
            "JSON": JSONFormatter(),
            "WebVTT": WebVTTFormatter(),
            "SRT": SRTFormatter()
        }
        
        formatter = formatter_map[format_type]
        formatted_transcript = formatter.format_transcript(transcript)

        return formatted_transcript

    except TranscriptsDisabled:
        return "Error: Transcripts are disabled for this video"
    except NoTranscriptFound:
        return "Error: No transcript found for the specified languages"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

def list_available_transcripts(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcripts_info = []
        
        for transcript in transcript_list:
            info = {
                "Language": transcript.language,
                "Code": transcript.language_code,
                "Is Generated": transcript.is_generated,
                "Is Translatable": transcript.is_translatable,
                "Translation Languages": transcript.translation_languages
            }
            transcripts_info.append(info)
        
        return json.dumps(transcripts_info, indent=2)
    except TranscriptsDisabled:
        return "Error: Transcripts are disabled for this video"
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="YouTube Transcript Fetcher") as demo:
    gr.Markdown("# YouTube Transcript Fetcher")
    gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options")
    
    with gr.Tab("Get Transcript"):
        with gr.Row():
            with gr.Column():
                video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
                languages_input = gr.Textbox(
                    label="Languages (comma-separated)",
                    placeholder="e.g., en,de,es",
                    value="en"
                )
                format_dropdown = gr.Dropdown(
                    choices=["Text", "JSON", "WebVTT", "SRT"],
                    label="Output Format",
                    value="Text"
                )
                translate_dropdown = gr.Dropdown(
                    choices=["", "en", "de", "es", "fr", "it"],
                    label="Translate To (optional)",
                    value=""
                )
                preserve_formatting = gr.Checkbox(
                    label="Preserve Formatting",
                    value=False
                )
                submit_btn = gr.Button("Get Transcript")
            
            with gr.Column():
                output = gr.Textbox(label="Transcript", lines=20)
        
        submit_btn.click(
            fn=get_transcript,
            inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting],
            outputs=output
        )
    
    with gr.Tab("List Available Transcripts"):
        with gr.Row():
            with gr.Column():
                list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
                list_btn = gr.Button("List Transcripts")
            
            with gr.Column():
                list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20)
        
        list_btn.click(
            fn=list_available_transcripts,
            inputs=list_video_id,
            outputs=list_output
        )
    
    gr.Markdown("""
    ### Notes
    - Enter a valid YouTube video ID (found in the URL)
    - Specify languages as comma-separated codes (e.g., "en,de")
    - Choose output format from available options
    - Optional: Select a language to translate the transcript to
    - Preserve formatting keeps HTML tags if present
    """)

demo.launch(
    share=True
)