Spaces:

ml6team
/

doc-to-slides

Sleeping

com3dian commited on Oct 23, 2024

Commit

b8609ad

verified ·

1 Parent(s): d8303d2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,10 @@ from weasyprint import HTML, CSS
 import io
 from io import BytesIO
 from grobidmonkey import reader
 from transformers import pipeline
 from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
@@ -104,14 +108,15 @@ if (uploaded_file is not None) and (not 'generation_done' in st.session_state):
 if (summ_text is not None) or ('summ_text' in st.session_state):
     # Function to render HTML content
     def format(title_list, text_list):
         format_list = []
         for index, text in enumerate(text_list):
             title = "## " + title_list[index] + "\n"
-            # Split text by periods
-            sentences = text.split('.')
             # Create HTML list items
-            list_items = "".join([f"- {sentence.strip()}.\n" for sentence in sentences if sentence.strip()])
             format_list.append(title + list_items)
         return format_list
@@ -304,8 +309,8 @@ if (summ_text is not None) or ('summ_text' in st.session_state):
         mime="application/pdf"
     )
     st.markdown("""
-    -----------------------------------------
-    Great! Thank you for using this huggingface space.\n
 If you want to know more about this application, you can take a look at the [paper](https://studenttheses.uu.nl/handle/20.500.12932/45939).\n
 To contact the author you can send an email to [email protected];\n
 To cite the paper you can use Bibtex\n

 import io
 from io import BytesIO
 from grobidmonkey import reader
+import nltk
+nltk.download('punkt')
+nltk.download('punkt_tab')
+from nltk.tokenize import sent_tokenize
 from transformers import pipeline
 from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
 if (summ_text is not None) or ('summ_text' in st.session_state):
     # Function to render HTML content
     def format(title_list, text_list):
         format_list = []
         for index, text in enumerate(text_list):
             title = "## " + title_list[index] + "\n"
+            # Split text into sentences using nltk's sent_tokenize
+            sentences = sent_tokenize(text)
             # Create HTML list items
+            list_items = "".join([f"- {sentence.strip()}\n" for sentence in sentences if sentence.strip()])
             format_list.append(title + list_items)
         return format_list
         mime="application/pdf"
     )
     st.markdown("""
+-----------------------------------------
+Great! Thank you for using this huggingface space.\n
 If you want to know more about this application, you can take a look at the [paper](https://studenttheses.uu.nl/handle/20.500.12932/45939).\n
 To contact the author you can send an email to [email protected];\n
 To cite the paper you can use Bibtex\n