com3dian commited on
Commit
b8609ad
·
verified ·
1 Parent(s): d8303d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -9,6 +9,10 @@ from weasyprint import HTML, CSS
9
  import io
10
  from io import BytesIO
11
  from grobidmonkey import reader
 
 
 
 
12
 
13
  from transformers import pipeline
14
  from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
@@ -104,14 +108,15 @@ if (uploaded_file is not None) and (not 'generation_done' in st.session_state):
104
  if (summ_text is not None) or ('summ_text' in st.session_state):
105
 
106
  # Function to render HTML content
 
107
  def format(title_list, text_list):
108
  format_list = []
109
  for index, text in enumerate(text_list):
110
  title = "## " + title_list[index] + "\n"
111
- # Split text by periods
112
- sentences = text.split('.')
113
  # Create HTML list items
114
- list_items = "".join([f"- {sentence.strip()}.\n" for sentence in sentences if sentence.strip()])
115
  format_list.append(title + list_items)
116
  return format_list
117
 
@@ -304,8 +309,8 @@ if (summ_text is not None) or ('summ_text' in st.session_state):
304
  mime="application/pdf"
305
  )
306
  st.markdown("""
307
- -----------------------------------------
308
- Great! Thank you for using this huggingface space.\n
309
  If you want to know more about this application, you can take a look at the [paper](https://studenttheses.uu.nl/handle/20.500.12932/45939).\n
310
  To contact the author you can send an email to [email protected];\n
311
  To cite the paper you can use Bibtex\n
 
9
  import io
10
  from io import BytesIO
11
  from grobidmonkey import reader
12
+ import nltk
13
+ nltk.download('punkt')
14
+ nltk.download('punkt_tab')
15
+ from nltk.tokenize import sent_tokenize
16
 
17
  from transformers import pipeline
18
  from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
 
108
  if (summ_text is not None) or ('summ_text' in st.session_state):
109
 
110
  # Function to render HTML content
111
+
112
  def format(title_list, text_list):
113
  format_list = []
114
  for index, text in enumerate(text_list):
115
  title = "## " + title_list[index] + "\n"
116
+ # Split text into sentences using nltk's sent_tokenize
117
+ sentences = sent_tokenize(text)
118
  # Create HTML list items
119
+ list_items = "".join([f"- {sentence.strip()}\n" for sentence in sentences if sentence.strip()])
120
  format_list.append(title + list_items)
121
  return format_list
122
 
 
309
  mime="application/pdf"
310
  )
311
  st.markdown("""
312
+ -----------------------------------------
313
+ Great! Thank you for using this huggingface space.\n
314
  If you want to know more about this application, you can take a look at the [paper](https://studenttheses.uu.nl/handle/20.500.12932/45939).\n
315
  To contact the author you can send an email to [email protected];\n
316
  To cite the paper you can use Bibtex\n