Spaces:

traversaal-ai
/

youtube-transcripts

Sleeping

App Files Files Community

traversaal-ai commited on Jul 2

Commit

795a986

verified ·

1 Parent(s): 730ce43

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+from youtube_transcript_api import YouTubeTranscriptApi
+def get_youtube_transcript(video_id: str) -> str:
+    """
+    Fetches and formats the transcript for a given YouTube video ID.
+    Args:
+        video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ').
+    Returns:
+        str: A formatted string containing the full transcript and detailed segments,
+             or an error message if the transcript cannot be retrieved.
+    """
+    if not video_id:
+        return "Please enter a YouTube video ID."
+    try:
+        # Fetch the transcript list
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        # Extract full transcript text
+        full_transcript_text = " ".join([item['text'] for item in transcript_list])
+        # Prepare detailed transcript segments
+        detailed_segments = []
+        for segment in transcript_list:
+            detailed_segments.append(
+                f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}"
+            )
+        # Combine results into a single string for Gradio output
+        output = (
+            "Full Transcript:\n"
+            f"{full_transcript_text}\n\n"
+            "Detailed Transcript Segments:\n"
+            f"{'\\n'.join(detailed_segments)}"
+        )
+        return output
+    except Exception as e:
+        # Handle potential errors during transcript fetching
+        error_message = (
+            f"An error occurred: {e}\n"
+            "Possible reasons: No transcript available for this video, "
+            "invalid video ID, or network issues. "
+            "Please ensure the video ID is correct and the video has captions enabled."
+        )
+        return error_message
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=get_youtube_transcript,
+    inputs=gr.Textbox(
+        label="YouTube Video ID",
+        placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)"
+    ),
+    outputs=gr.Textbox(
+        label="Transcript Output",
+        lines=20,  # Adjust number of lines for better display of long transcripts
+        interactive=False # Make the output box read-only
+    ),
+    title="YouTube Transcript Fetcher",
+    description="Enter a YouTube video ID to get its full transcript and detailed segments."
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()