Rehman1603 commited on
Commit
b966308
·
1 Parent(s): 22c4326

Delete summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +0 -55
summarize.py DELETED
@@ -1,55 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """summarize.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1xKHOeFek17CY_LDnUe0l0BHaTJrevHxO
8
- """
9
-
10
- import traceback
11
- import sys
12
- from youtube_transcript_api import YouTubeTranscriptApi
13
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
-
15
- !pip install youtube_transcript_api
16
-
17
- !pip install transformers
18
-
19
- def Summarizer(link, model):
20
-
21
- video_id = link.split("=")[1]
22
-
23
- try:
24
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
25
- FinalTranscript = ' '.join([i['text'] for i in transcript])
26
-
27
- if model == "Pegasus":
28
- checkpoint = "google/pegasus-large"
29
- elif model == "mT5":
30
- checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
31
- elif model == "BART":
32
- checkpoint = "sshleifer/distilbart-cnn-12-6"
33
-
34
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
35
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
36
-
37
-
38
- inputs = tokenizer(FinalTranscript,
39
- max_length=1024,
40
- truncation=True,
41
- return_tensors="pt")
42
-
43
- summary_ids = model.generate(inputs["input_ids"])
44
- summary = tokenizer.batch_decode(summary_ids,
45
- skip_special_tokens=True,
46
- clean_up_tokenization_spaces=False)
47
-
48
-
49
- return summary[0]
50
-
51
-
52
- except Exception:
53
- print(traceback.format_exc())
54
- # or
55
- print(sys.exc_info()[2])