Spaces:
Sleeping
Sleeping
File size: 1,235 Bytes
1ce1659 38fd181 a5e8d12 1ce1659 56cf7e3 1ce1659 38fd181 1ce1659 38fd181 a5e8d12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from nltk.tokenize import sent_tokenize
def split_into_sentences(input_text):
"""
Splits input text into sentences by newlines.
Args:
input_text: The input text as a string.
Returns:
A list of sentences. Returns an empty list if input is not valid.
"""
if not isinstance(input_text, str):
return []
paragraphs = input_text.splitlines(keepends=True)
sentences = []
for paragraph in paragraphs:
paragraph = paragraph.strip()
if paragraph and paragraph != "\n":
sentences.extend(sent_tokenize(paragraph))
return sentences
def split_into_paragraphs(input_text):
"""
Splits input text into sentences by newlines.
Args:
input_text: The input text as a string.
Returns:
A list of sentences. Returns an empty list if input is not valid.
"""
if not isinstance(input_text, str):
return []
paragraphs = input_text.splitlines(keepends=True)
out_paragraphs = []
for paragraph in paragraphs:
paragraph = paragraph.strip()
if paragraph and paragraph != "\n":
out_paragraphs.append(paragraph)
print(f"paragraphs: {out_paragraphs}")
return out_paragraphs |