Commit
·
6f00151
1
Parent(s):
3d1c35c
remove word less than 4
Browse files- preprocessor.py +3 -3
preprocessor.py
CHANGED
@@ -39,10 +39,10 @@ def preprocess(data):
|
|
39 |
|
40 |
def clean_message(message):
|
41 |
# Remove messages containing '<Media ...>'
|
42 |
-
if '<
|
43 |
return ''
|
44 |
-
# Remove words with less than
|
45 |
-
words = [word for word in message.split() if len(word) >=
|
46 |
# Remove stopwords
|
47 |
words = [word for word in words if word.lower() not in combined_stop_words]
|
48 |
return ' '.join(words)
|
|
|
39 |
|
40 |
def clean_message(message):
|
41 |
# Remove messages containing '<Media ...>'
|
42 |
+
if '<Médias omis>' in message:
|
43 |
return ''
|
44 |
+
# Remove words with less than 4 characters
|
45 |
+
words = [word for word in message.split() if len(word) >= 4]
|
46 |
# Remove stopwords
|
47 |
words = [word for word in words if word.lower() not in combined_stop_words]
|
48 |
return ' '.join(words)
|