Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
eeaa3ab
1
Parent(s):
0badc10
Refactored text chunk splitting logic in translate function
Browse files
app.py
CHANGED
|
@@ -42,20 +42,21 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
|
|
| 42 |
|
| 43 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
| 44 |
# split the input text into smaller chunks
|
| 45 |
-
# split first on newlines
|
| 46 |
outputs = ""
|
| 47 |
paragraph_chunks = text.split("\n")
|
| 48 |
for chunk in paragraph_chunks:
|
| 49 |
# check if the chunk is too long
|
| 50 |
if len(chunk) > 500:
|
| 51 |
-
# split on full stops
|
| 52 |
-
sentence_chunks =
|
| 53 |
for sentence in sentence_chunks:
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
else:
|
| 56 |
outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
|
| 57 |
|
| 58 |
-
return outputs
|
| 59 |
|
| 60 |
|
| 61 |
description = """
|
|
|
|
| 42 |
|
| 43 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
| 44 |
# split the input text into smaller chunks
|
|
|
|
| 45 |
outputs = ""
|
| 46 |
paragraph_chunks = text.split("\n")
|
| 47 |
for chunk in paragraph_chunks:
|
| 48 |
# check if the chunk is too long
|
| 49 |
if len(chunk) > 500:
|
| 50 |
+
# split on full stops, question marks, and exclamation marks
|
| 51 |
+
sentence_chunks = re.split(r"(?<=[.!?])\s+", chunk)
|
| 52 |
for sentence in sentence_chunks:
|
| 53 |
+
if sentence.strip(): # check if the sentence is not empty
|
| 54 |
+
outputs += f"{_translate(sentence, src_lang, tgt_lang)} "
|
| 55 |
+
outputs += "\n\n"
|
| 56 |
else:
|
| 57 |
outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
|
| 58 |
|
| 59 |
+
return outputs.strip()
|
| 60 |
|
| 61 |
|
| 62 |
description = """
|