varl42 commited on
Commit
23fa73c
·
1 Parent(s): 3e30cd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -20,7 +20,7 @@ def summarize_text(text):
20
  for i, sentence in enumerate(sentences):
21
  if "Abstract" in sentence:
22
  start = i + 1
23
- end = start + 7
24
  break
25
 
26
  # Extract abstract
@@ -31,10 +31,18 @@ def summarize_text(text):
31
  model = AutoModelForSeq2SeqLM.from_pretrained("google/bigbird-pegasus-large-arxiv")
32
 
33
  # Tokenize abstract
34
- inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True)
 
 
 
35
 
36
  # Generate summary
37
- summary_ids = model.generate(inputs['input_ids'], num_beams=2, max_length=42, min_length=22, do_sample=True, early_stopping=True)
 
 
 
 
 
38
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
39
 
40
  return summary
 
20
  for i, sentence in enumerate(sentences):
21
  if "Abstract" in sentence:
22
  start = i + 1
23
+ end = start + 6
24
  break
25
 
26
  # Extract abstract
 
31
  model = AutoModelForSeq2SeqLM.from_pretrained("google/bigbird-pegasus-large-arxiv")
32
 
33
  # Tokenize abstract
34
+ inputs = tokenizer(abstract,
35
+ max_length=1024,
36
+ return_tensors="pt",
37
+ truncation=True)
38
 
39
  # Generate summary
40
+ summary_ids = model.generate(inputs['input_ids'],
41
+ num_beams=3,
42
+ max_length=40,
43
+ min_length=25,
44
+ do_sample=False,
45
+ early_stopping=True)
46
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
47
 
48
  return summary