Rehman1603 commited on
Commit
a55174a
·
1 Parent(s): 77f2dd0

Upload summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +51 -0
summarize.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """summarize.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1xKHOeFek17CY_LDnUe0l0BHaTJrevHxO
8
+ """
9
+
10
+ import traceback
11
+ import sys
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
+
15
+ def Summarizer(link, model):
16
+
17
+ video_id = link.split("=")[1]
18
+
19
+ try:
20
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
21
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
22
+
23
+ if model == "Pegasus":
24
+ checkpoint = "google/pegasus-large"
25
+ elif model == "mT5":
26
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
27
+ elif model == "BART":
28
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
29
+
30
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
31
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
32
+
33
+
34
+ inputs = tokenizer(FinalTranscript,
35
+ max_length=1024,
36
+ truncation=True,
37
+ return_tensors="pt")
38
+
39
+ summary_ids = model.generate(inputs["input_ids"])
40
+ summary = tokenizer.batch_decode(summary_ids,
41
+ skip_special_tokens=True,
42
+ clean_up_tokenization_spaces=False)
43
+
44
+
45
+ return summary[0]
46
+
47
+
48
+ except Exception:
49
+ print(traceback.format_exc())
50
+ # or
51
+ print(sys.exc_info()[2])