from youtube_transcript_api import YouTubeTranscriptApi import re import torch import gradio as gr # Use a pipeline as a high-level helper from transformers import pipeline text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") # model_path = ("../Models/models--sshleifer--distilbart-cnn-6-6/snapshots/d2fde4ca965ba893255479612e4b801aa6500029") # text_summary = pipeline("summarization", model=model_path, # torch_dtype=torch.bfloat16) def split_text_to_chunks(text, chunk_size=1024): return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] def summary(input): output = '' text_array = split_text_to_chunks(input) for i, chunk in enumerate(text_array): output += text_summary(chunk[:1024])[0]['summary_text'] return output def get_video_id(url): """Extracts the video ID from a YouTube URL.""" pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*" match = re.search(pattern, url) return match.group(1) if match else None def get_transcript(video_url): """Fetches the transcript of a YouTube video.""" video_id = get_video_id(video_url) if not video_id: return "Invalid YouTube URL!" try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = "\n".join([entry["text"] for entry in transcript]) output = summary(transcript_text) return output except Exception as e: return f"Error fetching transcript: {e}" # youtube_url = input("Enter YouTube URL: ") # transcript = get_transcript(youtube_url) # output = summary(transcript) # print("\n--- Video Transcript ---\n") # print(output) gr.close_all() # demo = gr.Interface(fn=summary,inputs="text", outputs="text") demo = gr.Interface(fn=get_transcript, inputs=[gr.Textbox(label="Input text to summarize", lines=1)], outputs=[gr.Textbox(label="Summarized text", lines=4)], title="Text Summarizer", description="This application will be used to summarise the text") demo.launch()