Spaces:
Runtime error
Runtime error
Update tasks/text.py
Browse files- tasks/text.py +16 -0
tasks/text.py
CHANGED
@@ -12,6 +12,22 @@ import joblib
|
|
12 |
REPO_ID = "kantundpeterpan/frugal-ai-toy"
|
13 |
FILENAME = "tfidf_rf.pkl"
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def lemmatize_X(X):
|
16 |
return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x))
|
17 |
|
|
|
12 |
REPO_ID = "kantundpeterpan/frugal-ai-toy"
|
13 |
FILENAME = "tfidf_rf.pkl"
|
14 |
|
15 |
+
import nltk
|
16 |
+
from nltk.tokenize import WordPunctTokenizer
|
17 |
+
from nltk.stem import WordNetLemmatizer
|
18 |
+
from nltk.corpus import stopwords
|
19 |
+
import string
|
20 |
+
|
21 |
+
stop = set(stopwords.words('english') + list(string.punctuation))
|
22 |
+
|
23 |
+
def tokenize_quote(r):
|
24 |
+
tokens = nltk.word_tokenize(r.lower())
|
25 |
+
cleaned = [word for word in tokens if word not in stop]
|
26 |
+
return cleaned
|
27 |
+
|
28 |
+
def lemmatize_tokens(tokens: list):
|
29 |
+
return [lemmatizer.lemmatize(t) for t in tokens]
|
30 |
+
|
31 |
def lemmatize_X(X):
|
32 |
return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x))
|
33 |
|