Spaces:

ClarityClips
/

clarityclips-youtube-transcript

Running

App Files Files Community

Svngoku commited on 19 days ago

Commit

0acd025

verified ·

1 Parent(s): 250e8a5

Create main.py

Browse files

Files changed (1) hide show

main.py +133 -0

main.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import gradio as gr
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter
+import json
+def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting):
+    try:
+        # Set default languages if none provided
+        if not languages:
+            languages = ['en']
+        else:
+            languages = languages.split(',')
+        # Get transcript
+        transcript = YouTubeTranscriptApi.get_transcript(
+            video_id,
+            languages=languages,
+            preserve_formatting=preserve_formatting
+        )
+        # Translate if requested
+        if translate_to:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            base_transcript = transcript_list.find_transcript(languages)
+            transcript = base_transcript.translate(translate_to).fetch()
+        # Format transcript based on selected format
+        formatter_map = {
+            "Text": TextFormatter(),
+            "JSON": JSONFormatter(),
+            "WebVTT": WebVTTFormatter(),
+            "SRT": SRTFormatter()
+        }
+        formatter = formatter_map[format_type]
+        formatted_transcript = formatter.format_transcript(transcript)
+        return formatted_transcript
+    except TranscriptsDisabled:
+        return "Error: Transcripts are disabled for this video"
+    except NoTranscriptFound:
+        return "Error: No transcript found for the specified languages"
+    except Exception as e:
+        return f"Unexpected error: {str(e)}"
+def list_available_transcripts(video_id):
+    try:
+        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        transcripts_info = []
+        for transcript in transcript_list:
+            info = {
+                "Language": transcript.language,
+                "Code": transcript.language_code,
+                "Is Generated": transcript.is_generated,
+                "Is Translatable": transcript.is_translatable,
+                "Translation Languages": transcript.translation_languages
+            }
+            transcripts_info.append(info)
+        return json.dumps(transcripts_info, indent=2)
+    except TranscriptsDisabled:
+        return "Error: Transcripts are disabled for this video"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="YouTube Transcript Fetcher") as demo:
+    gr.Markdown("# YouTube Transcript Fetcher")
+    gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options")
+    with gr.Tab("Get Transcript"):
+        with gr.Row():
+            with gr.Column():
+                video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
+                languages_input = gr.Textbox(
+                    label="Languages (comma-separated)",
+                    placeholder="e.g., en,de,es",
+                    value="en"
+                )
+                format_dropdown = gr.Dropdown(
+                    choices=["Text", "JSON", "WebVTT", "SRT"],
+                    label="Output Format",
+                    value="Text"
+                )
+                translate_dropdown = gr.Dropdown(
+                    choices=["", "en", "de", "es", "fr", "it"],
+                    label="Translate To (optional)",
+                    value=""
+                )
+                preserve_formatting = gr.Checkbox(
+                    label="Preserve Formatting",
+                    value=False
+                )
+                submit_btn = gr.Button("Get Transcript")
+            with gr.Column():
+                output = gr.Textbox(label="Transcript", lines=20)
+        submit_btn.click(
+            fn=get_transcript,
+            inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting],
+            outputs=output
+        )
+    with gr.Tab("List Available Transcripts"):
+        with gr.Row():
+            with gr.Column():
+                list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ")
+                list_btn = gr.Button("List Transcripts")
+            with gr.Column():
+                list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20)
+        list_btn.click(
+            fn=list_available_transcripts,
+            inputs=list_video_id,
+            outputs=list_output
+        )
+    gr.Markdown("""
+    ### Notes
+    - Enter a valid YouTube video ID (found in the URL)
+    - Specify languages as comma-separated codes (e.g., "en,de")
+    - Choose output format from available options
+    - Optional: Select a language to translate the transcript to
+    - Preserve formatting keeps HTML tags if present
+    """)
+demo.launch(
+    share=True
+)