Spaces:
Sleeping
Sleeping
Update tasks/utils/predict.py
Browse files- tasks/utils/predict.py +6 -3
tasks/utils/predict.py
CHANGED
@@ -8,7 +8,7 @@ from tasks.utils.preprocessing import process_text
|
|
8 |
import json
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
|
11 |
-
def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_path: str):
|
12 |
"""
|
13 |
Predict the output using a saved TF-IDF vectorizer and Random Forest model.
|
14 |
|
@@ -35,11 +35,14 @@ def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_p
|
|
35 |
with open(tfidf_voc_path, "rb") as f:
|
36 |
vocab = pickle.load(f)
|
37 |
|
|
|
|
|
|
|
|
|
38 |
tfidf_vectorizer = TfidfVectorizer(**params)
|
39 |
tfidf_vectorizer.set_params(preprocessor=process_text)
|
40 |
tfidf_vectorizer.set_params(vocabulary=vocab)
|
41 |
-
|
42 |
-
print(tfidf_vectorizer.vocabulary_)
|
43 |
|
44 |
# Transform the input text using the TF-IDF vectorizer
|
45 |
text_data = input_df.to_pandas()["quote"]
|
|
|
8 |
import json
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
|
11 |
+
def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, tfidf_idf_path:str, model_path: str):
|
12 |
"""
|
13 |
Predict the output using a saved TF-IDF vectorizer and Random Forest model.
|
14 |
|
|
|
35 |
with open(tfidf_voc_path, "rb") as f:
|
36 |
vocab = pickle.load(f)
|
37 |
|
38 |
+
# Load vocabulary
|
39 |
+
with open(tfidf_idf_path, "rb") as f:
|
40 |
+
idf = pickle.load(f)
|
41 |
+
|
42 |
tfidf_vectorizer = TfidfVectorizer(**params)
|
43 |
tfidf_vectorizer.set_params(preprocessor=process_text)
|
44 |
tfidf_vectorizer.set_params(vocabulary=vocab)
|
45 |
+
tfidf_vectorizer.idf_ = idf
|
|
|
46 |
|
47 |
# Transform the input text using the TF-IDF vectorizer
|
48 |
text_data = input_df.to_pandas()["quote"]
|