added stopwords downloader
Browse files- preprocessing.py +3 -0
preprocessing.py
CHANGED
|
@@ -3,7 +3,10 @@ import string
|
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
import unicodedata
|
|
|
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from nltk.corpus import stopwords
|
| 8 |
stop_words = set(stopwords.words('russian', 'english'))
|
| 9 |
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
import unicodedata
|
| 6 |
+
import nltk
|
| 7 |
|
| 8 |
+
# Ensure NLTK stopwords are downloaded
|
| 9 |
+
nltk.download('stopwords')
|
| 10 |
from nltk.corpus import stopwords
|
| 11 |
stop_words = set(stopwords.words('russian', 'english'))
|
| 12 |
|