submission-template

Sleeping

sumesh4C commited on Jan 28

Commit

258e407

verified ·

1 Parent(s): dbda781

Update tasks/utils/predict.py

Files changed (1) hide show

tasks/utils/predict.py CHANGED Viewed

@@ -5,8 +5,10 @@ import pandas as pd
 import sys
 sys.path.append(".")
 from tasks.utils.preprocessing import process_text
-def predict(input_df: pd.DataFrame, tfidf_vectorizer , model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
@@ -19,16 +21,19 @@ def predict(input_df: pd.DataFrame, tfidf_vectorizer , model_path: str):
     Returns:
         pd.Series: Predictions for each row in the input dataframe.
     """
-    """
     # Load the TF-IDF vectorizer
     with open(tfidf_path, "rb") as tfidf_file:
-        tfidf_vectorizer = pickle.load(tfidf_file)
-    """
     # Load the Random Forest model
     with open(model_path, "rb") as model_file:
         model = pickle.load(model_file)
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]
     text_features = tfidf_vectorizer.transform(text_data)

 import sys
 sys.path.append(".")
 from tasks.utils.preprocessing import process_text
+import json
+from sklearn.feature_extraction.text import TfidfVectorizer
+def predict(input_df: pd.DataFrame, tfidf_vectorizer:str , model_path: str):
     """
     Predict the output using a saved TF-IDF vectorizer and Random Forest model.
     Returns:
         pd.Series: Predictions for each row in the input dataframe.
     """
     # Load the TF-IDF vectorizer
     with open(tfidf_path, "rb") as tfidf_file:
+        params = json.load(tfidf_file)
     # Load the Random Forest model
     with open(model_path, "rb") as model_file:
         model = pickle.load(model_file)
+    tfidf_vectorizer = = TfidfVectorizer(**params)
+    tfidf_vectorizer.set_params(preprocessor=process_text)
     # Transform the input text using the TF-IDF vectorizer
     text_data = input_df.to_pandas()["quote"]
     text_features = tfidf_vectorizer.transform(text_data)