Rehman1603 commited on
Commit
22c4326
·
1 Parent(s): 23c0407

Upload summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +55 -0
summarize.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """summarize.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1xKHOeFek17CY_LDnUe0l0BHaTJrevHxO
8
+ """
9
+
10
+ import traceback
11
+ import sys
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
+
15
+ !pip install youtube_transcript_api
16
+
17
+ !pip install transformers
18
+
19
+ def Summarizer(link, model):
20
+
21
+ video_id = link.split("=")[1]
22
+
23
+ try:
24
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
25
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
26
+
27
+ if model == "Pegasus":
28
+ checkpoint = "google/pegasus-large"
29
+ elif model == "mT5":
30
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
31
+ elif model == "BART":
32
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
35
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
36
+
37
+
38
+ inputs = tokenizer(FinalTranscript,
39
+ max_length=1024,
40
+ truncation=True,
41
+ return_tensors="pt")
42
+
43
+ summary_ids = model.generate(inputs["input_ids"])
44
+ summary = tokenizer.batch_decode(summary_ids,
45
+ skip_special_tokens=True,
46
+ clean_up_tokenization_spaces=False)
47
+
48
+
49
+ return summary[0]
50
+
51
+
52
+ except Exception:
53
+ print(traceback.format_exc())
54
+ # or
55
+ print(sys.exc_info()[2])