submission-template

Sleeping

sumesh4C commited on Jan 29

Commit

601d216

verified ·

1 Parent(s): c9878b6

Update tasks/utils/predict.py

Files changed (1) hide show

tasks/utils/predict.py CHANGED Viewed

@@ -8,7 +8,7 @@ from tasks.utils.preprocessing import process_text
 import json
 from sklearn.feature_extraction.text import TfidfVectorizer
-def predict(input_df: pd.DataFrame, tfidf_path:str , model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
@@ -31,8 +31,13 @@ def predict(input_df: pd.DataFrame, tfidf_path:str , model_path: str):
     with open(model_path, "rb") as model_file:
         model = pickle.load(model_file)
     tfidf_vectorizer = TfidfVectorizer(**params)
     tfidf_vectorizer.set_params(preprocessor=process_text)
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]

 import json
 from sklearn.feature_extraction.text import TfidfVectorizer
+def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
     with open(model_path, "rb") as model_file:
         model = pickle.load(model_file)
+    # Load vocabulary
+    with open(tfidf_voc_path, "rb") as f:
+        vocab = pickle.load(f)
     tfidf_vectorizer = TfidfVectorizer(**params)
     tfidf_vectorizer.set_params(preprocessor=process_text)
+    tfidf_vectorizer.set_params(vocabulary=vocab)
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]