|
import os |
|
import pandas as pd |
|
import tensorflow as tf |
|
import numpy as np |
|
|
|
from tensorflow.keras.layers import TextVectorization |
|
import gradio as gr |
|
from tensorflow.keras.layers import TextVectorization |
|
|
|
modelbaru = tf.keras.models.load_model('toxicity.h5') |
|
|
|
MAX_FEATURES = 200000 |
|
data = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv')) |
|
x = data['comment_text'] |
|
y = data[data.columns[2:]].values |
|
|
|
vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int') |
|
vectorizer.adapt(x.values) |
|
vectorizer('Yo Whats up')[:3] |
|
vectorized_text = vectorizer(x.values) |
|
vectorized_text |
|
|
|
input_str = vectorizer('yo i fuckin hate you') |
|
res = modelbaru.predict(np.expand_dims(input_str,0)) |
|
res > 0.5 |
|
data.columns[2:] |
|
data.columns[2:-1] |
|
|
|
def score_comment(comment): |
|
vectorized_comment = vectorizer([comment]) |
|
results = modelbaru.predict(vectorized_comment) |
|
|
|
text = '' |
|
for idx, col in enumerate(data.columns[2:-1]): |
|
text += '{}: {}\n'.format(col, results[0][idx]>0.5) |
|
|
|
return text |
|
|
|
interface = gr.Interface(fn=score_comment, inputs=gr.inputs.Textbox(lines=2, placeholder='Toxic Detector by: AezersX'), outputs='text') |
|
interface.launch(share=True) |