Rehman1603 commited on
Commit
d5c4567
·
1 Parent(s): 587659a

Create summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +42 -0
summarize.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ import sys
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+
6
+ def Summarizer(link, model):
7
+
8
+ video_id = link.split("=")[1]
9
+
10
+ try:
11
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
12
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
13
+
14
+ if model == "Pegasus":
15
+ checkpoint = "google/pegasus-large"
16
+ elif model == "mT5":
17
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
18
+ elif model == "BART":
19
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
22
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
23
+
24
+
25
+ inputs = tokenizer(FinalTranscript,
26
+ max_length=1024,
27
+ truncation=True,
28
+ return_tensors="pt")
29
+
30
+ summary_ids = model.generate(inputs["input_ids"])
31
+ summary = tokenizer.batch_decode(summary_ids,
32
+ skip_special_tokens=True,
33
+ clean_up_tokenization_spaces=False)
34
+
35
+
36
+ return summary[0]
37
+
38
+
39
+ except Exception:
40
+ print(traceback.format_exc())
41
+ # or
42
+ print(sys.exc_info()[2])