Spaces:

HMPhuoc
/

toxic

Running

App Files Files Community

HMPhuoc commited on Jul 7, 2024

Commit

f5d4e06

1 Parent(s): cf845e8

add tokenize function

Browse files

Files changed (1) hide show

app.py +10 -60

app.py CHANGED Viewed

@@ -16,98 +16,56 @@ from underthesea import word_tokenize
 from phoBERT import BERT_predict
-# Load tokenizer
-# fp = Path(__file__).with_name('tokenizer.pkl')
-# with open(fp,mode="rb") as f:
-#     tokenizer = pickle.load(f)
-#Load LSTM
-#fp = Path(__file__).with_name('lstm_model.h5')
 LSTM_model = tf.keras.models.load_model('lstm_model.tf')
-#Load GRU
-#fp = Path(__file__).with_name('gru_model.h5')
 GRU_model = tf.keras.models.load_model('gru_model.tf')
-def tokenizer_pad(tokenizer,comment_text,max_length=200):
-    comment_text = word_tokenize(comment_text, format="text")
-    comment_text = [comment_text]
-    tokenized_text = tokenizer.texts_to_sequences(comment_text)
-    padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")
-    return padded_sequences
 def LSTM_predict(x):
-    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
     pred_proba = LSTM_model.predict([x])[0]
     pred_proba = [round(i,2) for i in pred_proba]
-    #print(pred_proba)
     return pred_proba
 def GRU_predict(x):
-    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
     pred_proba = GRU_model.predict([x])[0]
     pred_proba = [round(i,2) for i in pred_proba]
-    #print(pred_proba)
     return pred_proba
-def plot(result):
-  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
-  data = pd.DataFrame()
-  data['Nhãn'] = label
-  data['Điểm'] = result
-  #print(data)
-  p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
-  return p
-  pass
 def judge(x):
-  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
   result = []
-  judge_result = []
-  x = ud.normalize('NFKC', x)
-  x = word_tokenize(x, format="text")
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
-  #bert_pred = BERT_predict(x)
-  #print(result)
-  return_result = 'Result'
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
-  #result_bert = np.round(bert_pred, 2)
   for i in range(6):
     result.append((result_lstm[i]+result_gru[i])/2)
   return (result)
 def judgePlus(x):
-  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
   result = []
-  judge_result = []
-  x = ud.normalize('NFKC', x)
-  x = word_tokenize(x, format="text")
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
@@ -117,11 +75,10 @@ def judgePlus(x):
     bert_pred = np.average([lstm_pred, gru_pred], axis=0)
-  return_result = 'Result'
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
   result_bert = np.round(bert_pred, 2)
-  #result_bert = np.round(bert_pred, 2)
   if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
     for i in range(6):
       result.append((result_bert[i])/1)
@@ -131,26 +88,19 @@ def judgePlus(x):
   return (result)
 def judgeBert(x):
-  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
   result = []
-  judge_result = []
-  x = ud.normalize('NFKC', x)
-  x = word_tokenize(x, format="text")
   try:
     bert_pred = BERT_predict(x)
   except:
     bert_pred = np.zeros(6, dtype=float)
-  return_result = 'Result'
   result_bert = np.round(bert_pred, 2)
-  #result_bert = np.round(bert_pred, 2)
   for i in range(6):
     result.append((result_bert[i])/1)

 from phoBERT import BERT_predict
 LSTM_model = tf.keras.models.load_model('lstm_model.tf')
 GRU_model = tf.keras.models.load_model('gru_model.tf')
 def LSTM_predict(x):
     pred_proba = LSTM_model.predict([x])[0]
     pred_proba = [round(i,2) for i in pred_proba]
     return pred_proba
 def GRU_predict(x):
     pred_proba = GRU_model.predict([x])[0]
     pred_proba = [round(i,2) for i in pred_proba]
     return pred_proba
+def tokenize(x):
+  x = ud.normalize('NFKC', x)
+  x = word_tokenize(x, format="text")
+  return x
 def judge(x):
   result = []
+  x = tokenize(x)
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
   for i in range(6):
     result.append((result_lstm[i]+result_gru[i])/2)
   return (result)
 def judgePlus(x):
   result = []
+  x = tokenize(x)
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
     bert_pred = np.average([lstm_pred, gru_pred], axis=0)
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
   result_bert = np.round(bert_pred, 2)
   if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
     for i in range(6):
       result.append((result_bert[i])/1)
   return (result)
 def judgeBert(x):
   result = []
+  x = tokenize(x)
   try:
     bert_pred = BERT_predict(x)
   except:
     bert_pred = np.zeros(6, dtype=float)
   result_bert = np.round(bert_pred, 2)
   for i in range(6):
     result.append((result_bert[i])/1)