prashantsv's picture
Update app.py
0ebee50 verified
raw
history blame
2.09 kB
from youtube_transcript_api import YouTubeTranscriptApi
import re
import torch
import gradio as gr
# Use a pipeline as a high-level helper
from transformers import pipeline
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
# model_path = ("../Models/models--sshleifer--distilbart-cnn-6-6/snapshots/d2fde4ca965ba893255479612e4b801aa6500029")
# text_summary = pipeline("summarization", model=model_path,
# torch_dtype=torch.bfloat16)
def split_text_to_chunks(text, chunk_size=1024):
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
def summary(input):
output = ''
text_array = split_text_to_chunks(input)
for i, chunk in enumerate(text_array):
output += text_summary(chunk[:1024])[0]['summary_text']
return output
def get_video_id(url):
"""Extracts the video ID from a YouTube URL."""
pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
match = re.search(pattern, url)
return match.group(1) if match else None
def get_transcript(video_url):
"""Fetches the transcript of a YouTube video."""
video_id = get_video_id(video_url)
if not video_id:
return "Invalid YouTube URL!"
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = "\n".join([entry["text"] for entry in transcript])
output = summary(transcript_text)
return output
except Exception as e:
return f"Error fetching transcript: {e}"
# youtube_url = input("Enter YouTube URL: ")
# transcript = get_transcript(youtube_url)
# output = summary(transcript)
# print("\n--- Video Transcript ---\n")
# print(output)
gr.close_all()
# demo = gr.Interface(fn=summary,inputs="text", outputs="text")
demo = gr.Interface(fn=get_transcript,
inputs=[gr.Textbox(label="Input text to summarize", lines=1)],
outputs=[gr.Textbox(label="Summarized text", lines=4)],
title="Text Summarizer",
description="This application will be used to summarise the text")
demo.launch()