gianb commited on
Commit
91861aa
·
1 Parent(s): 5420324

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py CHANGED
@@ -1,3 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  def summarize_pdf(pdf_path):
 
1
+ !pip install transformers pyPDF2 torchaudio
2
+
3
+ !pip install pdfminer.six
4
+
5
+ !pip install datasets sentencepiece
6
+
7
+ from google.colab import drive
8
+ from transformers import pipeline
9
+ import PyPDF2
10
+
11
+ from pdfminer.high_level import extract_pages, extract_text
12
+
13
+ from pdfminer.layout import LTTextContainer, LTChar
14
+
15
+ drive.mount('/content/drive')
16
+
17
+ pdf_path = '/content/drive/MyDrive/Applied AI/Assessment_3/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf'
18
+
19
+ summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary")
20
+
21
+ # Open the PDF file
22
+ pdf_file = open(pdf_path, 'rb')
23
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
24
+
25
+ # Extract text from the Abstract section
26
+ abstract_text = pdf_reader.pages[0].extract_text()
27
+
28
+ # Close the PDF file
29
+ pdf_file.close()
30
+
31
+ summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
32
+
33
+ print(summary)
34
+
35
+ !pip install --upgrade transformers sentencepiece datasets[audio]
36
+
37
+ import torch
38
+
39
+ import soundfile as sf
40
+
41
+ from IPython.display import Audio
42
+
43
+ from datasets import load_dataset
44
+
45
+ synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng")
46
+
47
+ TTS_Output = synthesiser(summary)
48
+
49
+ print(TTS_Output.keys())
50
+
51
+
52
+ audio_key = TTS_Output["audio"]
53
+
54
+
55
+ Audio(data=audio_key[0], rate=16000)
56
+
57
+ !pip install gradio==2.3.6
58
+ !pip install --upgrade typing-extensions
59
+
60
  import gradio as gr
61
 
62
  def summarize_pdf(pdf_path):