Spaces:
Sleeping
Sleeping
File size: 1,819 Bytes
d4317f5 945679c d4317f5 cdf3c9a d4317f5 e8f1cde 483b0b1 cdf3c9a d4317f5 ac9ff43 d4317f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from transformers import pipeline
import re
import gradio as gr
pipe = pipeline("summarization", model="Falconsai/text_summarization")
def extract_youtube_id(url):
"""
Extracts the YouTube video ID from a given URL.
Args:
url (str): The YouTube video URL.
Returns:
str: The extracted video ID, or None if no match is found.
"""
# Regular expression to match YouTube video IDs
pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def summary(textt):
output = pipe(textt)
return output[0]['summary_text']
def get_youtube_transcript(link):
video_id= extract_youtube_id(link)
if not video_id:
return "Video ID could not be extracted"
try:
transcript= YouTubeTranscriptApi.get_transcript(video_id) # fetches the trancript
formatter= TextFormatter()
text_transcript = formatter.format_transcript(transcript) # This will format the transcript
summarized_text = summary(text_transcript)
return summarized_text
except Exception as e:
return f"An error has occured: {e}"
demo = gr.Interface(fn=get_youtube_transcript,
inputs=[gr.Textbox(label="Input youtube url to generate the summary of video", lines=6)],
outputs=[gr.Textbox(label="Summary of youtube video")],
title='Generate Video Summary',
description='This is a project to generate the summary of a video')
demo.launch(share='True')
|