Spaces:
Sleeping
Sleeping
Commit
·
ab2f83e
1
Parent(s):
7bdad43
Update app.py
Browse files
app.py
CHANGED
@@ -18,8 +18,6 @@ import functions
|
|
18 |
torch.manual_seed(1)
|
19 |
|
20 |
# Preprocess function
|
21 |
-
import re
|
22 |
-
from contractions import contractions_dict
|
23 |
from nltk.corpus import stopwords
|
24 |
from nltk.tokenize import word_tokenize
|
25 |
|
@@ -32,9 +30,6 @@ def preprocess_text(text):
|
|
32 |
html_pattern = re.compile(r'<[^<>]+>')
|
33 |
text = html_pattern.sub(' ', text)
|
34 |
|
35 |
-
# Expand contractions
|
36 |
-
text = ' '.join([contractions_dict.get(word, word) for word in text.split()])
|
37 |
-
|
38 |
# Remove punctuation and digits
|
39 |
text = re.sub(r'[^\w\s]', ' ', text)
|
40 |
|
|
|
18 |
torch.manual_seed(1)
|
19 |
|
20 |
# Preprocess function
|
|
|
|
|
21 |
from nltk.corpus import stopwords
|
22 |
from nltk.tokenize import word_tokenize
|
23 |
|
|
|
30 |
html_pattern = re.compile(r'<[^<>]+>')
|
31 |
text = html_pattern.sub(' ', text)
|
32 |
|
|
|
|
|
|
|
33 |
# Remove punctuation and digits
|
34 |
text = re.sub(r'[^\w\s]', ' ', text)
|
35 |
|