Update utils.py
Browse files
utils.py
CHANGED
|
@@ -57,7 +57,7 @@ def normalize(text, segment=True):
|
|
| 57 |
text = replace_all(text, dict_map)
|
| 58 |
if segment:
|
| 59 |
text = text.split(".")
|
| 60 |
-
text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text
|
| 61 |
return text
|
| 62 |
def text_preprocess(document):
|
| 63 |
punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
|
|
@@ -84,7 +84,7 @@ def text_preprocess(document):
|
|
| 84 |
document = re.sub(" ", " ", document)
|
| 85 |
try:
|
| 86 |
document = document.split(".")
|
| 87 |
-
document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document
|
| 88 |
except:
|
| 89 |
pass
|
| 90 |
return document.lower()
|
|
|
|
| 57 |
text = replace_all(text, dict_map)
|
| 58 |
if segment:
|
| 59 |
text = text.split(".")
|
| 60 |
+
text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text])
|
| 61 |
return text
|
| 62 |
def text_preprocess(document):
|
| 63 |
punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
|
|
|
|
| 84 |
document = re.sub(" ", " ", document)
|
| 85 |
try:
|
| 86 |
document = document.split(".")
|
| 87 |
+
document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document])
|
| 88 |
except:
|
| 89 |
pass
|
| 90 |
return document.lower()
|