AezersX
commited on
Commit
·
f282658
1
Parent(s):
40f1040
Upload 8 files
Browse files- .gitattributes +2 -0
- app.py +40 -0
- flagged/log.csv +15 -0
- jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv +0 -0
- jigsaw-toxic-comment-classification-challenge/test.csv/test.csv +3 -0
- jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv +0 -0
- jigsaw-toxic-comment-classification-challenge/train.csv/train.csv +3 -0
- requirements.txt +4 -0
- toxicity.h5 +3 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv filter=lfs diff=lfs merge=lfs -text
|
36 |
+
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import tensorflow as tf
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from tensorflow.keras.layers import TextVectorization
|
7 |
+
import gradio as gr
|
8 |
+
from tensorflow.keras.layers import TextVectorization
|
9 |
+
|
10 |
+
modelbaru = tf.keras.models.load_model('toxicity.h5')
|
11 |
+
|
12 |
+
MAX_FEATURES = 200000
|
13 |
+
data = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv'))
|
14 |
+
x = data['comment_text']
|
15 |
+
y = data[data.columns[2:]].values
|
16 |
+
|
17 |
+
vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int')
|
18 |
+
vectorizer.adapt(x.values)
|
19 |
+
vectorizer('Yo Whats up')[:3]
|
20 |
+
vectorized_text = vectorizer(x.values)
|
21 |
+
vectorized_text
|
22 |
+
|
23 |
+
input_str = vectorizer('yo i fuckin hate you')
|
24 |
+
res = modelbaru.predict(np.expand_dims(input_str,0))
|
25 |
+
res > 0.5
|
26 |
+
data.columns[2:]
|
27 |
+
data.columns[2:-1]
|
28 |
+
|
29 |
+
def score_comment(comment):
|
30 |
+
vectorized_comment = vectorizer([comment])
|
31 |
+
results = modelbaru.predict(vectorized_comment)
|
32 |
+
|
33 |
+
text = ''
|
34 |
+
for idx, col in enumerate(data.columns[2:-1]):
|
35 |
+
text += '{}: {}\n'.format(col, results[0][idx]>0.5)
|
36 |
+
|
37 |
+
return text
|
38 |
+
|
39 |
+
interface = gr.Interface(fn=score_comment, inputs=gr.inputs.Textbox(lines=2, placeholder='Toxic Detector by: AezersX'), outputs='text')
|
40 |
+
interface.launch(share=True)
|
flagged/log.csv
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'comment','output','flag','username','timestamp'
|
2 |
+
'','toxic: False
|
3 |
+
severe_toxic: False
|
4 |
+
obscene: False
|
5 |
+
threat: False
|
6 |
+
insult: False
|
7 |
+
','','','2022-08-05 22:09:27.277332'
|
8 |
+
'','toxic: False
|
9 |
+
severe_toxic: False
|
10 |
+
obscene: False
|
11 |
+
threat: False
|
12 |
+
insult: False
|
13 |
+
','','','2022-08-05 22:09:29.066786'
|
14 |
+
'test','','','','2022-12-22 19:43:56.796880'
|
15 |
+
'test','','','','2022-12-22 19:43:57.836865'
|
jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
|
3 |
+
size 60354593
|
jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
|
3 |
+
size 68802655
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
tensorflow == 2.1.0
|
3 |
+
numpy
|
4 |
+
gradio
|
toxicity.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:165dd163b69f9ba71c6a78193a3d2f3a196ff8d0916d4194e8f848f9ae423c7b
|
3 |
+
size 77581464
|