from youtube_transcript_api import YouTubeTranscriptApi import re import torch import gradio as gr # Use a pipeline as a high-level helper from transformers import pipeline text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") def split_text_to_chunks(text, chunk_size=1024): return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] def summary(input): output = '' text_array = split_text_to_chunks(input) for i, chunk in enumerate(text_array): output += text_summary(chunk[:1024])[0]['summary_text'] return output def get_video_id(url): """Extracts the video ID from a YouTube URL.""" pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*" match = re.search(pattern, url) return match.group(1) if match else None def get_transcript(video_url): """Fetches the transcript of a YouTube video.""" video_id = get_video_id(video_url) if not video_id: return "Invalid YouTube URL!" try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = "\n".join([entry["text"] for entry in transcript]) output = summary(transcript_text) return output except Exception as e: return f"Error fetching transcript: {e}" gr.close_all() demo = gr.Interface(fn=get_transcript, inputs=[gr.Textbox(label="Input text to summarize", lines=1)], outputs=[gr.Textbox(label="Summarized text", lines=4)], title="Youtube Video Summarizer", description="This application will be used to summarise the text") demo.launch()