sumesh4C commited on
Commit
cae897c
·
verified ·
1 Parent(s): ced4359

Update tasks/utils/predict.py

Browse files
Files changed (1) hide show
  1. tasks/utils/predict.py +6 -3
tasks/utils/predict.py CHANGED
@@ -8,7 +8,7 @@ from tasks.utils.preprocessing import process_text
8
  import json
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
 
11
- def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_path: str):
12
  """
13
  Predict the output using a saved TF-IDF vectorizer and Random Forest model.
14
 
@@ -35,11 +35,14 @@ def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, model_p
35
  with open(tfidf_voc_path, "rb") as f:
36
  vocab = pickle.load(f)
37
 
 
 
 
 
38
  tfidf_vectorizer = TfidfVectorizer(**params)
39
  tfidf_vectorizer.set_params(preprocessor=process_text)
40
  tfidf_vectorizer.set_params(vocabulary=vocab)
41
-
42
- print(tfidf_vectorizer.vocabulary_)
43
 
44
  # Transform the input text using the TF-IDF vectorizer
45
  text_data = input_df.to_pandas()["quote"]
 
8
  import json
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
 
11
+ def predict(input_df: pd.DataFrame, tfidf_path:str , tfidf_voc_path:str, tfidf_idf_path:str, model_path: str):
12
  """
13
  Predict the output using a saved TF-IDF vectorizer and Random Forest model.
14
 
 
35
  with open(tfidf_voc_path, "rb") as f:
36
  vocab = pickle.load(f)
37
 
38
+ # Load vocabulary
39
+ with open(tfidf_idf_path, "rb") as f:
40
+ idf = pickle.load(f)
41
+
42
  tfidf_vectorizer = TfidfVectorizer(**params)
43
  tfidf_vectorizer.set_params(preprocessor=process_text)
44
  tfidf_vectorizer.set_params(vocabulary=vocab)
45
+ tfidf_vectorizer.idf_ = idf
 
46
 
47
  # Transform the input text using the TF-IDF vectorizer
48
  text_data = input_df.to_pandas()["quote"]