Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ def summarize_text(text):
|
|
20 |
for i, sentence in enumerate(sentences):
|
21 |
if "Abstract" in sentence:
|
22 |
start = i + 1
|
23 |
-
end = start +
|
24 |
break
|
25 |
|
26 |
# Extract abstract
|
@@ -31,10 +31,18 @@ def summarize_text(text):
|
|
31 |
model = AutoModelForSeq2SeqLM.from_pretrained("google/bigbird-pegasus-large-arxiv")
|
32 |
|
33 |
# Tokenize abstract
|
34 |
-
inputs = tokenizer(abstract,
|
|
|
|
|
|
|
35 |
|
36 |
# Generate summary
|
37 |
-
summary_ids = model.generate(inputs['input_ids'],
|
|
|
|
|
|
|
|
|
|
|
38 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
39 |
|
40 |
return summary
|
|
|
20 |
for i, sentence in enumerate(sentences):
|
21 |
if "Abstract" in sentence:
|
22 |
start = i + 1
|
23 |
+
end = start + 6
|
24 |
break
|
25 |
|
26 |
# Extract abstract
|
|
|
31 |
model = AutoModelForSeq2SeqLM.from_pretrained("google/bigbird-pegasus-large-arxiv")
|
32 |
|
33 |
# Tokenize abstract
|
34 |
+
inputs = tokenizer(abstract,
|
35 |
+
max_length=1024,
|
36 |
+
return_tensors="pt",
|
37 |
+
truncation=True)
|
38 |
|
39 |
# Generate summary
|
40 |
+
summary_ids = model.generate(inputs['input_ids'],
|
41 |
+
num_beams=3,
|
42 |
+
max_length=40,
|
43 |
+
min_length=25,
|
44 |
+
do_sample=False,
|
45 |
+
early_stopping=True)
|
46 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
47 |
|
48 |
return summary
|