File size: 1,944 Bytes
0783ba1
c21e8e7
 
0783ba1
 
c21e8e7
 
 
0783ba1
 
55701f7
c21e8e7
55701f7
4b4ff87
0783ba1
55701f7
c21e8e7
 
 
 
0783ba1
 
c21e8e7
55701f7
0783ba1
c21e8e7
 
 
 
55701f7
0783ba1
c21e8e7
 
 
 
 
 
 
 
 
55701f7
c21e8e7
55701f7
 
c21e8e7
 
 
55701f7
c21e8e7
55701f7
c21e8e7
55701f7
 
 
7d01b5e
55701f7
 
 
 
 
0783ba1
 
55701f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import re
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import torch
import gradio as gr
from transformers import pipeline

text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)



def summary (input):
    output = text_summary(input)
    return output[0]['summary_text']


def extract_video_id(url):
    # Regex to extract the video ID from various YouTube URL formats
    regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    match = re.search(regex, url)
    if match:
        return match.group(1)
    return None


def get_youtube_transcript(video_url):
    video_id = extract_video_id(video_url)
    if not video_id:
        return "Video ID could not be extracted."

    try:
        # Fetch the transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id)

        # Format the transcript into plain text
        formatter = TextFormatter()
        text_transcript = formatter.format_transcript(transcript)
        summary_text = summary(text_transcript)

        return summary_text
    except Exception as e:
        return f"An error occurred: {e}"


# Example URL (Replace this with the actual URL when using the script)
# video_url = "https://youtu.be/5PibknhIsTc"
# print(get_youtube_transcript(video_url))

gr.close_all()

# demo = gr.Interface(fn=summary, inputs="text",outputs="text")
gr.close_all()

demo = gr.Interface(
    fn=get_youtube_transcript,
    inputs=[gr.Textbox(label="Input YouTube video url to summarize", lines=1)],
    outputs=[gr.Textbox(label="Summarized YouTube video Script", lines=4)],
    title="Project 02: YouTube Video Script Summarizer",
    description="As understood from the title, if not already, this application will summarize your YouTube video"
                " Script"
)

demo.launch()