mrsk1883 commited on
Commit
5cecb0e
·
1 Parent(s): f8b4423

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -31
app.py CHANGED
@@ -1,27 +1,16 @@
1
  import gradio as gr
2
- from PyPDF2 import PdfReader
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
- from gtts import gTTS
5
- from io import BytesIO
6
 
7
- # IPython check
8
- try:
9
- from IPython.display import Audio
10
- ipython_available = True
11
- except ImportError:
12
- ipython_available = False
13
-
14
- # Model
15
  model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
16
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
19
- def summarize_pdf_abstract(pdf_bytes):
20
 
21
  try:
22
- reader = PdfReader(pdf_bytes)
23
-
24
- abstract_text = ""
25
  for page in reader.pages:
26
  if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
27
  abstract_text = page.extract_text()
@@ -29,22 +18,17 @@ def summarize_pdf_abstract(pdf_bytes):
29
 
30
  inputs = tokenizer(abstract_text, return_tensors="pt")
31
  outputs = model.generate(**inputs)
32
- summary = tokenizer.decode(outputs[0])
33
-
34
- if ipython_available:
35
- speech = gTTS(text=summary, lang="en")
36
- speech_bytes = speech.get_wav_data()
37
- else:
38
- speech_bytes = None
39
 
40
- return {"summary": summary, "audio": speech_bytes}
41
 
42
  except Exception as e:
43
  raise Exception(str(e))
44
-
45
- if ipython_available:
46
- interface = gr.Interface(...)
47
- else:
48
- interface = gr.Interface(...)
49
-
50
- interface.launch()
 
 
1
  import gradio as gr
2
+ from PyPDF2 import PdfReader
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
4
 
 
 
 
 
 
 
 
 
5
  model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
6
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
 
9
+ def summarize_pdf_abstract(pdf_file):
10
 
11
  try:
12
+ reader = PdfReader(pdf_file)
13
+ abstract_text = ""
 
14
  for page in reader.pages:
15
  if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
16
  abstract_text = page.extract_text()
 
18
 
19
  inputs = tokenizer(abstract_text, return_tensors="pt")
20
  outputs = model.generate(**inputs)
21
+ summary = tokenizer.decode(outputs[0])
 
 
 
 
 
 
22
 
23
+ return {"summary": summary}
24
 
25
  except Exception as e:
26
  raise Exception(str(e))
27
+
28
+ interface = gr.Interface(
29
+ fn=summarize_pdf_abstract,
30
+ inputs=gr.inputs.File(label="Upload PDF"),
31
+ outputs=gr.outputs.Textbox(label="Summary")
32
+ )
33
+
34
+ interface.launch(share=True)