Spaces:

Sayiqa7
/

youtbe_content_summ

Running

App Files Files Community

Sayiqa7 commited on Dec 29, 2024

Commit

c63913d

verified ·

1 Parent(s): d7aeb92

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -132

app.py CHANGED Viewed

@@ -66,148 +66,83 @@ else:
 # if __name__ == "__main__":
 #     interface.launch()
-# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-# import gradio as gr
-# from youtube_transcript_api import YouTubeTranscriptApi
-# from urllib.parse import urlparse, parse_qs
-# def extract_video_id(url):
-#     """
-#     Extract video ID from YouTube URL
-#     """
-#     parsed_url = urlparse(url)
-#     if parsed_url.hostname == 'youtu.be':
-#         return parsed_url.path[1:]
-#     if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
-#         if parsed_url.path == '/watch':
-#             return parse_qs(parsed_url.query)['v'][0]
-#     return None
-# def get_transcript(video_id):
-#     """
-#     Get transcript from YouTube video
-#     """
-#     try:
-#         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-#         transcript = ' '.join([t['text'] for t in transcript_list])
-#         return transcript
-#     except Exception as e:
-#         return f"Error getting transcript: {str(e)}"
-# def summarize_youtube_video(video_url):
-#     """
-#     Main function to summarize YouTube video content
-#     """
-#     try:
-#         # Extract video ID
-#         video_id = extract_video_id(video_url)
-#         if not video_id:
-#             return "Invalid YouTube URL"
-#         # Get transcript
-#         transcript = get_transcript(video_id)
-#         if transcript.startswith("Error"):
-#             return transcript
-#         # Load model and tokenizer
-#         tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
-#         model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
-#         # Create summarization pipeline
-#         summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
-#         # Generate summary
-#         summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
-#         return summary[0]['summary_text']
-#     except Exception as e:
-#         return f"An error occurred: {str(e)}"
-# # Create Gradio interface
-# interface = gr.Interface(
-#     fn=summarize_youtube_video,
-#     inputs=gr.Textbox(
-#         lines=1,
-#         placeholder="Enter YouTube video URL here..."
-#     ),
-#     outputs=gr.Textbox(
-#         lines=5,
-#         label="Video Summary"
-#     ),
-#     title="YouTube Video Summarizer",
-#     description="Enter a YouTube video URL to generate a concise summary of its content.",
-# )
-# # Launch the interface
-# if __name__ == "__main__":
-#     interface.launch()
-##########################
-from pytube import YouTube
-from transformers import pipeline
 import gradio as gr
-import os
-# Define a function to download the audio from YouTube
-def download_audio_from_youtube(video_url):
     try:
-        # Initialize YouTube object with video URL
-        yt = YouTube(video_url)
-        # Filter the stream to get the first available audio-only stream
-        stream = yt.streams.filter(only_audio=True).first()
-        if stream is None:
-            raise Exception("No audio stream available for this video.")
-        # Download audio stream
-        audio_filename = "audio.mp4"
-        stream.download(filename=audio_filename)
-        return audio_filename
     except Exception as e:
-        print(f"Error downloading video: {e}")
-        return None
-# Load the speech-to-text pipeline (Whisper)
-transcriber = pipeline(model="openai/whisper-large", task="automatic-speech-recognition")
-# Function to transcribe the audio file
-def transcribe_audio(audio_file_path):
     try:
-        # Use Whisper model for transcription
-        result = transcriber(audio_file_path)
-        return result['text']
-    except Exception as e:
-        print(f"Error during transcription: {e}")
-        return "Error transcribing the audio."
-# Function to handle Gradio interface input and output
-def process_youtube_url(video_url):
-    # Step 1: Download audio from the given YouTube video URL
-    audio_file = download_audio_from_youtube(video_url)
-    if audio_file:
-        # Step 2: Transcribe the audio
-        transcription = transcribe_audio(audio_file)
-        # Clean up the downloaded audio file after transcription
-        os.remove(audio_file)
-        return transcription
-    else:
-        return "Failed to download or find audio for the provided YouTube video."
-# Create a Gradio interface to upload a YouTube URL and display transcription
 interface = gr.Interface(
-    fn=process_youtube_url,
-    inputs=gr.Textbox(label="Enter YouTube Video URL"),
-    outputs="text",
-    live=True
 )
-# Launch the Gradio interface
-interface.launch()

 # if __name__ == "__main__":
 #     interface.launch()
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
+from youtube_transcript_api import YouTubeTranscriptApi
+from urllib.parse import urlparse, parse_qs
+def extract_video_id(url):
+    """
+    Extract video ID from YouTube URL
+    """
+    parsed_url = urlparse(url)
+    if parsed_url.hostname == 'youtu.be':
+        return parsed_url.path[1:]
+    if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
+        if parsed_url.path == '/watch':
+            return parse_qs(parsed_url.query)['v'][0]
+    return None
+def get_transcript(video_id):
+    """
+    Get transcript from YouTube video
+    """
     try:
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        transcript = ' '.join([t['text'] for t in transcript_list])
+        return transcript
     except Exception as e:
+        return f"Error getting transcript: {str(e)}"
+def summarize_youtube_video(video_url):
+    """
+    Main function to summarize YouTube video content
+    """
     try:
+        # Extract video ID
+        video_id = extract_video_id(video_url)
+        if not video_id:
+            return "Invalid YouTube URL"
+        # Get transcript
+        transcript = get_transcript(video_id)
+        if transcript.startswith("Error"):
+            return transcript
+        # Load model and tokenizer
+        tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
+        model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
+        # Create summarization pipeline
+        summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
+        # Generate summary
+        summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
+        return summary[0]['summary_text']
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Create Gradio interface
 interface = gr.Interface(
+    fn=summarize_youtube_video,
+    inputs=gr.Textbox(
+        lines=1,
+        placeholder="Enter YouTube video URL here..."
+    ),
+    outputs=gr.Textbox(
+        lines=5,
+        label="Video Summary"
+    ),
+    title="YouTube Video Summarizer",
+    description="Enter a YouTube video URL to generate a concise summary of its content.",
 )
+# Launch the interface
+if __name__ == "__main__":
+    interface.launch()
+##########################