aakash0563 commited on
Commit
9d18fbb
·
verified ·
1 Parent(s): 043f83f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import torch
4
+ from transformers import pipeline
5
+ import gradio as gr
6
+ import os
7
+ from youtube_transcript_api import YouTubeTranscriptApi
8
+
9
+ summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")
10
+
11
+ def summarize(full_txt, min_summ_len=30):
12
+ l = full_txt.split(" ")
13
+ l_summ = []
14
+ chunk_len = 750
15
+ overlap = 50
16
+ pointer = 0
17
+ flag = True
18
+ while(flag):
19
+ if pointer < len(l):
20
+ if pointer + chunk_len < len(l):
21
+ txt = " ".join(l[pointer:pointer+chunk_len])
22
+ pointer = pointer + chunk_len - overlap
23
+ l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
24
+ else:
25
+ txt = " ".join(l[pointer:])
26
+ l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
27
+ pointer = len(l)
28
+ flag = False
29
+
30
+ large_summ = " ".join(l_summ)
31
+ print(l_summ)
32
+ l_large_summ = large_summ.split(" ")
33
+
34
+ if len(large_summ.split(" ")) < chunk_len:
35
+ summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
36
+ else:
37
+ flag = True
38
+ pointer = 0
39
+ final_summ = []
40
+ while(flag):
41
+ if pointer < len(l_large_summ):
42
+ if pointer + chunk_len < len(l_large_summ):
43
+ txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
44
+ pointer = pointer + chunk_len - overlap
45
+ t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
46
+ print(t)
47
+ final_summ.append(t)
48
+ else:
49
+ txt = " ".join(l_large_summ[pointer:])
50
+ t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
51
+ final_summ.append(t)
52
+ print(t)
53
+ pointer = len(l_large_summ)
54
+ flag = False
55
+ large_summ = " ".join(final_summ)
56
+ summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
57
+ return summ
58
+
59
+ def extract_text(youtube_video_url,min_summ_len):
60
+ try:
61
+ video_id = youtube_video_url.split("=")[1]
62
+ transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
63
+ transcript = ""
64
+ for i in transcript_text:
65
+ transcript += " " + i["text"]
66
+ res = summarize(transcript,min_summ_len)
67
+ return res
68
+ except Exception as e:
69
+ raise e
70
+
71
+ demo = gr.Interface(
72
+ fn=extract_text,
73
+ inputs=["text","number"], # Number input first, then file input
74
+ outputs="text",
75
+ title="YouTube Video Text Summarization for Efficient Information Capture",
76
+ description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
77
+ )
78
+
79
+ demo.launch(debug=True)