File size: 2,094 Bytes
8d0feca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ebee50
8d0feca
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from youtube_transcript_api import YouTubeTranscriptApi
import re
import torch
import gradio as gr

# Use a pipeline as a high-level helper
from transformers import pipeline

text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
# model_path = ("../Models/models--sshleifer--distilbart-cnn-6-6/snapshots/d2fde4ca965ba893255479612e4b801aa6500029")

# text_summary = pipeline("summarization", model=model_path,
#                 torch_dtype=torch.bfloat16)


def split_text_to_chunks(text, chunk_size=1024):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

def summary(input):
    output = ''
    text_array = split_text_to_chunks(input)
    for i, chunk in enumerate(text_array):
        output += text_summary(chunk[:1024])[0]['summary_text']
    return output

def get_video_id(url):
    """Extracts the video ID from a YouTube URL."""
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None

def get_transcript(video_url):
    """Fetches the transcript of a YouTube video."""
    video_id = get_video_id(video_url)
    if not video_id:
        return "Invalid YouTube URL!"

    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = "\n".join([entry["text"] for entry in transcript])
        output = summary(transcript_text)
        return output
    except Exception as e:
        return f"Error fetching transcript: {e}"

# youtube_url = input("Enter YouTube URL: ")
# transcript = get_transcript(youtube_url)
# output = summary(transcript)
# print("\n--- Video Transcript ---\n")
# print(output)

gr.close_all()
# demo = gr.Interface(fn=summary,inputs="text", outputs="text")
demo = gr.Interface(fn=get_transcript,
                    inputs=[gr.Textbox(label="Input text to summarize", lines=1)],
                    outputs=[gr.Textbox(label="Summarized text", lines=4)],
                    title="Text Summarizer",
                    description="This application will be used to summarise the text")
demo.launch()