from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import NoTranscriptFound, VideoUnavailable import re import torch import gradio as gr # Use a pipeline as a high-level helper from transformers import pipeline #model_path = ("../Models/models--sshleifer--distilbart-cnn-6-6/snapshots" # "/d2fde4ca965ba893255479612e4b801aa6500029") #text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16) #text='''Elon Reeve Musk (/ˈiːlɒn mʌsk/; born June 28, 1971) is a businessman and political figure known for his key roles in the automotive company Tesla, Inc. and the space company SpaceX. He is also known for his ownership of X Corp. (the company that operates the social media platform X, formerly Twitter), and his role in the founding of the Boring Company, xAI, Neuralink, and OpenAI. Musk is the wealthiest individual in the world; as of January 2025, Forbes estimates his net worth to be US$427 billion.''' text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", torch_dtype=torch.bfloat16) def summary(input): output = text_summary(input) return output[0]['summary_text'] def get_youtube_video_id(url): """ Extracts the video ID from a YouTube URL. """ # Regular expression to match YouTube video IDs pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11}).*' match = re.search(pattern, url) if match: return match.group(1) else: raise ValueError("Invalid YouTube URL.") def get_transcript(video_url): """ Fetches the transcript for a YouTube video given its URL. """ try: video_id = get_youtube_video_id(video_url) print(video_id) transcript = YouTubeTranscriptApi.get_transcript(video_id) # Combine the transcript text into a single string full_transcript = "\n".join([entry['text'] for entry in transcript]) #print("\n--- Transcript ---\n") #print("\n--- Transcript ---\n") #print(full_transcript) summary_text = summary(full_transcript) return summary_text except NoTranscriptFound: return "Transcript not available for this video." except VideoUnavailable: return "The video is unavailable or private." except Exception as e: return f"An error occurred: {e}" if __name__ == "__main__": gr.close_all() #video_url = input("Enter the YouTube video URL: ") #transcript = get_transcript(video_url) #print("\n--- SUMMARY Transcript ---\n") #print("\n--- SUMMARY Transcript ---\n") #print(transcript) demo = gr.Interface(fn=get_transcript, inputs=[gr.Textbox(label="Input YouTube Url to summarize", lines=1)], outputs=[gr.Textbox(label="Summarized text", lines=4)], title="@GenAILearniverse Project 2: YouTube Script Summarizer", description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO SCRIPT.") demo.launch()