Rehman1603 commited on
Commit
95239bb
·
1 Parent(s): 56f0c5c

Update summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +11 -19
summarize.py CHANGED
@@ -1,49 +1,41 @@
1
- # -*- coding: utf-8 -*-
2
- """summarize.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1xKHOeFek17CY_LDnUe0l0BHaTJrevHxO
8
- """
9
-
10
  import traceback
11
  import sys
 
12
  from youtube_transcript_api import YouTubeTranscriptApi
13
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
 
15
  def Summarizer(link, model):
16
-
17
  video_id = link.split("=")[1]
18
 
19
  try:
20
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
21
  FinalTranscript = ' '.join([i['text'] for i in transcript])
22
-
23
  if model == "Pegasus":
24
  checkpoint = "google/pegasus-large"
25
  elif model == "mT5":
26
  checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
27
  elif model == "BART":
28
  checkpoint = "sshleifer/distilbart-cnn-12-6"
29
-
30
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
31
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
32
 
33
 
34
- inputs = tokenizer(FinalTranscript,
35
- max_length=1024,
36
  truncation=True,
37
  return_tensors="pt")
38
-
39
  summary_ids = model.generate(inputs["input_ids"])
40
- summary = tokenizer.batch_decode(summary_ids,
41
- skip_special_tokens=True,
42
  clean_up_tokenization_spaces=False)
43
-
44
 
45
  return summary[0]
46
-
47
 
48
  except Exception:
49
  print(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
1
  import traceback
2
  import sys
3
+
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
7
  def Summarizer(link, model):
8
+
9
  video_id = link.split("=")[1]
10
 
11
  try:
12
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
13
  FinalTranscript = ' '.join([i['text'] for i in transcript])
14
+
15
  if model == "Pegasus":
16
  checkpoint = "google/pegasus-large"
17
  elif model == "mT5":
18
  checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
19
  elif model == "BART":
20
  checkpoint = "sshleifer/distilbart-cnn-12-6"
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
23
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
24
 
25
 
26
+ inputs = tokenizer(FinalTranscript,
27
+ max_length=1024,
28
  truncation=True,
29
  return_tensors="pt")
30
+
31
  summary_ids = model.generate(inputs["input_ids"])
32
+ summary = tokenizer.batch_decode(summary_ids,
33
+ skip_special_tokens=True,
34
  clean_up_tokenization_spaces=False)
35
+
36
 
37
  return summary[0]
38
+
39
 
40
  except Exception:
41
  print(traceback.format_exc())