Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,7 @@ import nltk
|
|
17 |
import warnings
|
18 |
import streamlit as st
|
19 |
from PIL import Image
|
20 |
-
|
21 |
-
from torch.quantization import quantize_dynamic
|
22 |
|
23 |
nltk.download('punkt')
|
24 |
|
@@ -91,8 +90,8 @@ def preprocess_plain_text(text,window_size=3):
|
|
91 |
text = re.sub(r"https*\S+", " ", text) # url
|
92 |
text = re.sub(r"@\S+", " ", text) # mentions
|
93 |
text = re.sub(r"#\S+", " ", text) # hastags
|
94 |
-
|
95 |
-
text = re.sub("[^.,!?%$A-Za-z0-9]+", " ", text) # special characters except .,!?
|
96 |
|
97 |
#break into lines and remove leading and trailing space on each
|
98 |
lines = [line.strip() for line in text.splitlines()]
|
|
|
17 |
import warnings
|
18 |
import streamlit as st
|
19 |
from PIL import Image
|
20 |
+
|
|
|
21 |
|
22 |
nltk.download('punkt')
|
23 |
|
|
|
90 |
text = re.sub(r"https*\S+", " ", text) # url
|
91 |
text = re.sub(r"@\S+", " ", text) # mentions
|
92 |
text = re.sub(r"#\S+", " ", text) # hastags
|
93 |
+
text = re.sub(r"\s{2,}", " ", text) # over spaces
|
94 |
+
#text = re.sub("[^.,!?%$A-Za-z0-9]+", " ", text) # special characters except .,!?
|
95 |
|
96 |
#break into lines and remove leading and trailing space on each
|
97 |
lines = [line.strip() for line in text.splitlines()]
|