prashantsv's picture
Update app.py
820ca0a verified
raw
history blame
1.63 kB
from youtube_transcript_api import YouTubeTranscriptApi
import re
import torch
import gradio as gr
# Use a pipeline as a high-level helper
from transformers import pipeline
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
def split_text_to_chunks(text, chunk_size=1024):
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
def summary(input):
output = ''
text_array = split_text_to_chunks(input)
for i, chunk in enumerate(text_array):
output += text_summary(chunk[:1024])[0]['summary_text']
return output
def get_video_id(url):
"""Extracts the video ID from a YouTube URL."""
pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
match = re.search(pattern, url)
return match.group(1) if match else None
def get_transcript(video_url):
"""Fetches the transcript of a YouTube video."""
video_id = get_video_id(video_url)
if not video_id:
return "Invalid YouTube URL!"
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = "\n".join([entry["text"] for entry in transcript])
output = summary(transcript_text)
return output
except Exception as e:
return f"Error fetching transcript: {e}"
gr.close_all()
demo = gr.Interface(fn=get_transcript,
inputs=[gr.Textbox(label="Input text to summarize", lines=1)],
outputs=[gr.Textbox(label="Summarized text", lines=4)],
title="Text Summarizer",
description="This application will be used to summarise the text")
demo.launch()