defyingentropy commited on
Commit
dc05613
·
1 Parent(s): 482f778

use pickkles

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -11,6 +11,7 @@ from nltk.stem import WordNetLemmatizer
11
  from nltk.corpus import wordnet
12
  import nltk
13
  import os
 
14
 
15
 
16
  def lowercasing(lda_samples):
@@ -91,16 +92,8 @@ nltk.download('averaged_perceptron_tagger')
91
  nltk.download('wordnet')
92
  nltk.download('omw-1.4')
93
 
94
- preprocessing_samples = list(filter(lambda x: isinstance(x, str), df['reviewText']))
95
- preprocessing_samples = lowercasing(preprocessing_samples)
96
- preprocessing_samples = punctuation_removal(preprocessing_samples)
97
- preprocessing_samples = lemmatize(preprocessing_samples)
98
-
99
- tf_vectorizer = CountVectorizer(
100
- max_df=0.01, min_df=2, max_features=n_features, stop_words="english"
101
- )
102
- tf = tf_vectorizer.fit_transform(preprocessing_samples)
103
-
104
 
105
  product_id = 'B009MA34NY'
106
  lda_samples = list(filter(lambda x: isinstance(x, str), df[df['asin'] == product_id]['reviewText']))
 
11
  from nltk.corpus import wordnet
12
  import nltk
13
  import os
14
+ import pickle
15
 
16
 
17
  def lowercasing(lda_samples):
 
92
  nltk.download('wordnet')
93
  nltk.download('omw-1.4')
94
 
95
+ with open("vectorizer.pkl", "rb") as f:
96
+ tf_vectorizer = pickle.load(f)
 
 
 
 
 
 
 
 
97
 
98
  product_id = 'B009MA34NY'
99
  lda_samples = list(filter(lambda x: isinstance(x, str), df[df['asin'] == product_id]['reviewText']))