AezersX commited on
Commit
f282658
·
1 Parent(s): 40f1040

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ jigsaw-toxic-comment-classification-challenge/test.csv/test.csv filter=lfs diff=lfs merge=lfs -text
36
+ jigsaw-toxic-comment-classification-challenge/train.csv/train.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import tensorflow as tf
4
+ import numpy as np
5
+
6
+ from tensorflow.keras.layers import TextVectorization
7
+ import gradio as gr
8
+ from tensorflow.keras.layers import TextVectorization
9
+
10
+ modelbaru = tf.keras.models.load_model('toxicity.h5')
11
+
12
+ MAX_FEATURES = 200000
13
+ data = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv'))
14
+ x = data['comment_text']
15
+ y = data[data.columns[2:]].values
16
+
17
+ vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int')
18
+ vectorizer.adapt(x.values)
19
+ vectorizer('Yo Whats up')[:3]
20
+ vectorized_text = vectorizer(x.values)
21
+ vectorized_text
22
+
23
+ input_str = vectorizer('yo i fuckin hate you')
24
+ res = modelbaru.predict(np.expand_dims(input_str,0))
25
+ res > 0.5
26
+ data.columns[2:]
27
+ data.columns[2:-1]
28
+
29
+ def score_comment(comment):
30
+ vectorized_comment = vectorizer([comment])
31
+ results = modelbaru.predict(vectorized_comment)
32
+
33
+ text = ''
34
+ for idx, col in enumerate(data.columns[2:-1]):
35
+ text += '{}: {}\n'.format(col, results[0][idx]>0.5)
36
+
37
+ return text
38
+
39
+ interface = gr.Interface(fn=score_comment, inputs=gr.inputs.Textbox(lines=2, placeholder='Toxic Detector by: AezersX'), outputs='text')
40
+ interface.launch(share=True)
flagged/log.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'comment','output','flag','username','timestamp'
2
+ '','toxic: False
3
+ severe_toxic: False
4
+ obscene: False
5
+ threat: False
6
+ insult: False
7
+ ','','','2022-08-05 22:09:27.277332'
8
+ '','toxic: False
9
+ severe_toxic: False
10
+ obscene: False
11
+ threat: False
12
+ insult: False
13
+ ','','','2022-08-05 22:09:29.066786'
14
+ 'test','','','','2022-12-22 19:43:56.796880'
15
+ 'test','','','','2022-12-22 19:43:57.836865'
jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv ADDED
The diff for this file is too large to render. See raw diff
 
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
3
+ size 60354593
jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv ADDED
The diff for this file is too large to render. See raw diff
 
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
3
+ size 68802655
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ tensorflow == 2.1.0
3
+ numpy
4
+ gradio
toxicity.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165dd163b69f9ba71c6a78193a3d2f3a196ff8d0916d4194e8f848f9ae423c7b
3
+ size 77581464