submission-template

Sleeping

sumesh4C commited on Jan 29

Commit

cae897c

verified ·

1 Parent(s): ced4359

Update tasks/utils/predict.py

Files changed (1) hide show

tasks/utils/predict.py CHANGED Viewed

@@ -8,7 +8,7 @@ from tasks.utils.preprocessing import process_text
 import json
 from sklearn.feature_extraction.text import TfidfVectorizer
-def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
@@ -35,11 +35,14 @@ def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_p
     with open(tfidf_voc_path, "rb") as f:
         vocab = pickle.load(f)
     tfidf_vectorizer = TfidfVectorizer(**params)
     tfidf_vectorizer.set_params(preprocessor=process_text)
     tfidf_vectorizer.set_params(vocabulary=vocab)
-    print(tfidf_vectorizer.vocabulary_)
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]

 import json
 from sklearn.feature_extraction.text import TfidfVectorizer
+def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, tfidf_idf_path:str, model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
     with open(tfidf_voc_path, "rb") as f:
         vocab = pickle.load(f)
+    # Load vocabulary
+    with open(tfidf_idf_path, "rb") as f:
+        idf = pickle.load(f)
     tfidf_vectorizer = TfidfVectorizer(**params)
     tfidf_vectorizer.set_params(preprocessor=process_text)
     tfidf_vectorizer.set_params(vocabulary=vocab)
+    tfidf_vectorizer.idf_ = idf
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]