File size: 3,508 Bytes
9d18fbb
 
 
 
 
 
 
 
6844efe
 
9d18fbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a2af6c
 
 
 
 
 
 
 
 
 
 
 
2a57676
9ef5b4b
6a2af6c
 
 
 
9ef5b4b
6a2af6c
9d18fbb
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
import numpy as np
import torch
from transformers import pipeline
import gradio as gr
import os
from youtube_transcript_api import YouTubeTranscriptApi

summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")

summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize(full_txt, min_summ_len=30):
    l = full_txt.split(" ")
    l_summ = []
    chunk_len = 750
    overlap = 50
    pointer = 0
    flag = True
    while(flag):
        if pointer < len(l):
            if pointer + chunk_len < len(l):
                txt = " ".join(l[pointer:pointer+chunk_len])
                pointer = pointer + chunk_len - overlap
                l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
            else:
                txt = " ".join(l[pointer:])
                l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
                pointer = len(l)
                flag = False

    large_summ = " ".join(l_summ)
    print(l_summ)
    l_large_summ = large_summ.split(" ")

    if len(large_summ.split(" ")) < chunk_len:
        summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
    else: 
        flag = True
        pointer = 0
        final_summ = []
        while(flag):
            if pointer < len(l_large_summ):
                if pointer + chunk_len < len(l_large_summ):
                    txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
                    pointer = pointer + chunk_len - overlap
                    t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
                    print(t)
                    final_summ.append(t)
                else:
                    txt = " ".join(l_large_summ[pointer:])
                    t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
                    final_summ.append(t)
                    print(t)
                    pointer = len(l_large_summ)
                    flag = False
        large_summ = " ".join(final_summ)
        summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
    return summ

def extract_text(youtube_video_url,min_summ_len):
    # try:
    #     video_id = youtube_video_url.split("=")[1]
    #     transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
    #     transcript = ""
    #     for i in transcript_text:
    #         transcript += " " + i["text"]
    #     print(transcript)
    #     res = summarize(transcript,min_summ_len)
    #     return res
    # except Exception as e:
    #     raise e
    video_id = youtube_video_url.split("=")[1]
    transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en'])
    transcript = " "
    for i in transcript_text:
        transcript += " " + i["text"]
    print(transcript)
    res = summarize(transcript,min_summ_len)
    print(res)
    return res

demo = gr.Interface(
    fn=extract_text,
    inputs=["text","number"],  # Number input first, then file input
    outputs="text",
    title="YouTube Video Text Summarization for Efficient Information Capture",
    description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
)

demo.launch(debug=True)