Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import numpy
|
|
6 |
import scipy
|
7 |
from gtts import gTTS
|
8 |
from io import BytesIO
|
9 |
-
from transformers import
|
10 |
|
11 |
def extract_text(pdf_file):
|
12 |
pdfReader = PyPDF2.PdfReader(pdf_file)
|
@@ -27,14 +27,14 @@ def summarize_text(text):
|
|
27 |
abstract = ". ".join(sentences[start:end+1])
|
28 |
|
29 |
# Load BART model & tokenizer
|
30 |
-
tokenizer =
|
31 |
-
model =
|
32 |
|
33 |
# Tokenize abstract
|
34 |
inputs = tokenizer(abstract, return_tensors="pt", truncation=True)
|
35 |
|
36 |
# Generate summary
|
37 |
-
summary_ids = model.generate(inputs['input_ids'], num_beams=
|
38 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
39 |
|
40 |
return summary
|
|
|
6 |
import scipy
|
7 |
from gtts import gTTS
|
8 |
from io import BytesIO
|
9 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
10 |
|
11 |
def extract_text(pdf_file):
|
12 |
pdfReader = PyPDF2.PdfReader(pdf_file)
|
|
|
27 |
abstract = ". ".join(sentences[start:end+1])
|
28 |
|
29 |
# Load BART model & tokenizer
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
31 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
32 |
|
33 |
# Tokenize abstract
|
34 |
inputs = tokenizer(abstract, return_tensors="pt", truncation=True)
|
35 |
|
36 |
# Generate summary
|
37 |
+
summary_ids = model.generate(inputs['input_ids'], num_beams=5, max_length=45, min_length=30, early_stopping=True)
|
38 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
39 |
|
40 |
return summary
|