Spaces:
Sleeping
Sleeping
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import TextFormatter | |
from transformers import pipeline | |
import re | |
import gradio as gr | |
pipe = pipeline("summarization", model="Falconsai/text_summarization") | |
def extract_youtube_id(url): | |
""" | |
Extracts the YouTube video ID from a given URL. | |
Args: | |
url (str): The YouTube video URL. | |
Returns: | |
str: The extracted video ID, or None if no match is found. | |
""" | |
# Regular expression to match YouTube video IDs | |
pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
match = re.search(pattern, url) | |
if match: | |
return match.group(1) | |
return None | |
def summary(textt): | |
output = pipe(textt) | |
return output[0]['summary_text'] | |
def get_youtube_transcript(link): | |
video_id= extract_youtube_id(link) | |
if not video_id: | |
return "Video ID could not be extracted" | |
try: | |
transcript= YouTubeTranscriptApi.get_transcript(video_id) # fetches the trancript | |
formatter= TextFormatter() | |
text_transcript = formatter.format_transcript(transcript) # This will format the transcript | |
summarized_text = summary(text_transcript) | |
return summarized_text | |
except Exception as e: | |
return f"An error has occured: {e}" | |
demo = gr.Interface(fn=get_youtube_transcript, | |
inputs=[gr.Textbox(label="Input youtube url to generate the summary of video", lines=6)], | |
outputs=[gr.Textbox(label="Summary of youtube video")], | |
title='Generate Video Summary', | |
description='This is a project to generate the summary of a video') | |
demo.launch(share='True') | |