Spaces:

Sayiqa7
/

youtbe_content_summ

Running

App Files Files Community

Sayiqa7 commited on Dec 29, 2024

Commit

5964686

verified ·

1 Parent(s): c63913d

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -67

app.py CHANGED Viewed

@@ -66,83 +66,148 @@ else:
 # if __name__ == "__main__":
 #     interface.launch()
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
-from youtube_transcript_api import YouTubeTranscriptApi
-from urllib.parse import urlparse, parse_qs
-def extract_video_id(url):
-    """
-    Extract video ID from YouTube URL
-    """
-    parsed_url = urlparse(url)
-    if parsed_url.hostname == 'youtu.be':
-        return parsed_url.path[1:]
-    if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
-        if parsed_url.path == '/watch':
-            return parse_qs(parsed_url.query)['v'][0]
-    return None
-def get_transcript(video_id):
-    """
-    Get transcript from YouTube video
-    """
     try:
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        transcript = ' '.join([t['text'] for t in transcript_list])
-        return transcript
     except Exception as e:
-        return f"Error getting transcript: {str(e)}"
-def summarize_youtube_video(video_url):
-    """
-    Main function to summarize YouTube video content
-    """
     try:
-        # Extract video ID
-        video_id = extract_video_id(video_url)
-        if not video_id:
-            return "Invalid YouTube URL"
-        # Get transcript
-        transcript = get_transcript(video_id)
-        if transcript.startswith("Error"):
-            return transcript
-        # Load model and tokenizer
-        tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
-        model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
-        # Create summarization pipeline
-        summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
-        # Generate summary
-        summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
-        return summary[0]['summary_text']
-    except Exception as e:
-        return f"An error occurred: {str(e)}"
-# Create Gradio interface
 interface = gr.Interface(
-    fn=summarize_youtube_video,
-    inputs=gr.Textbox(
-        lines=1,
-        placeholder="Enter YouTube video URL here..."
-    ),
-    outputs=gr.Textbox(
-        lines=5,
-        label="Video Summary"
-    ),
-    title="YouTube Video Summarizer",
-    description="Enter a YouTube video URL to generate a concise summary of its content.",
 )
-# Launch the interface
-if __name__ == "__main__":
-    interface.launch()
-##########################

 # if __name__ == "__main__":
 #     interface.launch()
+# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+# import gradio as gr
+# from youtube_transcript_api import YouTubeTranscriptApi
+# from urllib.parse import urlparse, parse_qs
+# def extract_video_id(url):
+#     """
+#     Extract video ID from YouTube URL
+#     """
+#     parsed_url = urlparse(url)
+#     if parsed_url.hostname == 'youtu.be':
+#         return parsed_url.path[1:]
+#     if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
+#         if parsed_url.path == '/watch':
+#             return parse_qs(parsed_url.query)['v'][0]
+#     return None
+# def get_transcript(video_id):
+#     """
+#     Get transcript from YouTube video
+#     """
+#     try:
+#         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+#         transcript = ' '.join([t['text'] for t in transcript_list])
+#         return transcript
+#     except Exception as e:
+#         return f"Error getting transcript: {str(e)}"
+# def summarize_youtube_video(video_url):
+#     """
+#     Main function to summarize YouTube video content
+#     """
+#     try:
+#         # Extract video ID
+#         video_id = extract_video_id(video_url)
+#         if not video_id:
+#             return "Invalid YouTube URL"
+#         # Get transcript
+#         transcript = get_transcript(video_id)
+#         if transcript.startswith("Error"):
+#             return transcript
+#         # Load model and tokenizer
+#         tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
+#         model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
+#         # Create summarization pipeline
+#         summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
+#         # Generate summary
+#         summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
+#         return summary[0]['summary_text']
+#     except Exception as e:
+#         return f"An error occurred: {str(e)}"
+# # Create Gradio interface
+# interface = gr.Interface(
+#     fn=summarize_youtube_video,
+#     inputs=gr.Textbox(
+#         lines=1,
+#         placeholder="Enter YouTube video URL here..."
+#     ),
+#     outputs=gr.Textbox(
+#         lines=5,
+#         label="Video Summary"
+#     ),
+#     title="YouTube Video Summarizer",
+#     description="Enter a YouTube video URL to generate a concise summary of its content.",
+# )
+# # Launch the interface
+# if __name__ == "__main__":
+#     interface.launch()
+##########################
+from pytube import YouTube
+from transformers import pipeline
 import gradio as gr
+import os
+# Define a function to download the audio from YouTube
+def download_audio_from_youtube(video_url):
     try:
+        # Initialize YouTube object with video URL
+        yt = YouTube(video_url)
+        # Filter the stream to get the first available audio-only stream
+        stream = yt.streams.filter(only_audio=True).first()
+        if stream is None:
+            raise Exception("No audio stream available for this video.")
+        # Download audio stream
+        audio_filename = "audio.mp4"
+        stream.download(filename=audio_filename)
+        return audio_filename
     except Exception as e:
+        print(f"Error downloading video: {e}")
+        return None
+# Load the speech-to-text pipeline (Whisper)
+transcriber = pipeline(model="openai/whisper-large", task="automatic-speech-recognition")
+# Function to transcribe the audio file
+def transcribe_audio(audio_file_path):
     try:
+        # Use Whisper model for transcription
+        result = transcriber(audio_file_path)
+        return result['text']
+    except Exception as e:
+        print(f"Error during transcription: {e}")
+        return "Error transcribing the audio."
+# Function to handle Gradio interface input and output
+def process_youtube_url(video_url):
+    # Step 1: Download audio from the given YouTube video URL
+    audio_file = download_audio_from_youtube(video_url)
+    if audio_file:
+        # Step 2: Transcribe the audio
+        transcription = transcribe_audio(audio_file)
+        # Clean up the downloaded audio file after transcription
+        os.remove(audio_file)
+        return transcription
+    else:
+        return "Failed to download or find audio for the provided YouTube video."
+# Create a Gradio interface to upload a YouTube URL and display transcription
 interface = gr.Interface(
+    fn=process_youtube_url,
+    inputs=gr.Textbox(label="Enter YouTube Video URL"),
+    outputs="text",
+    live=True
 )
+# Launch the Gradio interface
+interface.launch()