Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +48 -0
- .env.template +9 -0
- .gitattributes +27 -35
- .gitignore +83 -0
- Dockerfile +29 -0
- analysis/analysis.txt +264 -0
- analysis/analyze_lang_distribution.py +336 -0
- analysis/compute_class_weights.py +499 -0
- analysis/plot_loss_curves.py +374 -0
- analysis/plot_roc_curves.py +163 -0
- app.py +262 -0
- augmentation/balance_english.py +237 -0
- augmentation/threat_augment.py +379 -0
- augmentation/toxic_augment.py +439 -0
- datacard.md +39 -0
- docker-compose.yml +13 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_identity_hate.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_insult.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_obscene.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_severe_toxic.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_threat.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_0.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_1.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_2.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_3.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_4.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_5.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_6.png +0 -0
- evaluation_results/eval_20250208_161149/eval_params.json +7 -0
- evaluation_results/eval_20250208_161149/evaluation_results.json +2020 -0
- evaluation_results/eval_20250208_161149/plots/calibration_0.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_1.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_2.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_3.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_4.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_5.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_6.png +3 -0
- evaluation_results/eval_20250208_161149/plots/class_calibration.png +3 -0
- evaluation_results/eval_20250208_161149/plots/language_performance.png +0 -0
- evaluation_results/eval_20250208_161149/plots/metric_correlations.png +0 -0
- evaluation_results/eval_20250208_161149/plots/overall_calibration.png +0 -0
- evaluation_results/eval_20250208_161149/plots/performance_distributions.png +0 -0
- evaluation_results/eval_20250208_161149/predictions.npz +3 -0
- evaluation_results/eval_20250208_161149/thresholds.json +58 -0
- evaluation_results/eval_20250401_143401/eval_params.json +21 -0
- evaluation_results/eval_20250401_143401/evaluation_results.json +684 -0
- evaluation_results/eval_20250401_143401/plots/per_class_comparison.png +0 -0
- evaluation_results/eval_20250401_143401/plots/roc_all_classes.png +3 -0
- evaluation_results/eval_20250401_143401/plots/roc_by_language.png +3 -0
.dockerignore
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Git
|
2 |
+
.git
|
3 |
+
.gitignore
|
4 |
+
|
5 |
+
# Python
|
6 |
+
__pycache__/
|
7 |
+
*.py[cod]
|
8 |
+
*$py.class
|
9 |
+
*.so
|
10 |
+
.Python
|
11 |
+
env/
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
|
28 |
+
# Virtual Environment
|
29 |
+
venv/
|
30 |
+
ENV/
|
31 |
+
|
32 |
+
# IDE
|
33 |
+
.idea/
|
34 |
+
.vscode/
|
35 |
+
*.swp
|
36 |
+
*.swo
|
37 |
+
|
38 |
+
# Project specific
|
39 |
+
dataset/
|
40 |
+
weights/
|
41 |
+
wandb/
|
42 |
+
*.pt
|
43 |
+
*.pth
|
44 |
+
*.ckpt
|
45 |
+
|
46 |
+
# Logs
|
47 |
+
*.log
|
48 |
+
logs/
|
.env.template
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Weights & Biases API Key
|
2 |
+
WANDB_API_KEY=
|
3 |
+
|
4 |
+
# Model Configuration
|
5 |
+
BATCH_SIZE=16
|
6 |
+
GRAD_ACCUM_STEPS=4
|
7 |
+
EPOCHS=5
|
8 |
+
LEARNING_RATE=2e-5
|
9 |
+
MODEL_NAME=xlm-roberta-large
|
.gitattributes
CHANGED
@@ -1,35 +1,27 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
dataset/raw/MULTILINGUAL_TOXIC_DATASET_360K_7LANG.csv filter=lfs diff=lfs merge=lfs -text
|
2 |
+
dataset/raw/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_binary.csv filter=lfs diff=lfs merge=lfs -text
|
3 |
+
dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv filter=lfs diff=lfs merge=lfs -text
|
4 |
+
dataset/split/train.csv filter=lfs diff=lfs merge=lfs -text
|
5 |
+
dataset/processed/MULTILINGUAL_TOXIC_DATASET_AUGMENTED.csv filter=lfs diff=lfs merge=lfs -text
|
6 |
+
evaluation_results/eval_20250208_161149/plots/calibration_0.png filter=lfs diff=lfs merge=lfs -text
|
7 |
+
evaluation_results/eval_20250208_161149/plots/calibration_1.png filter=lfs diff=lfs merge=lfs -text
|
8 |
+
evaluation_results/eval_20250208_161149/plots/calibration_2.png filter=lfs diff=lfs merge=lfs -text
|
9 |
+
evaluation_results/eval_20250208_161149/plots/calibration_3.png filter=lfs diff=lfs merge=lfs -text
|
10 |
+
evaluation_results/eval_20250208_161149/plots/calibration_4.png filter=lfs diff=lfs merge=lfs -text
|
11 |
+
evaluation_results/eval_20250208_161149/plots/calibration_5.png filter=lfs diff=lfs merge=lfs -text
|
12 |
+
evaluation_results/eval_20250208_161149/plots/calibration_6.png filter=lfs diff=lfs merge=lfs -text
|
13 |
+
evaluation_results/eval_20250208_161149/plots/class_calibration.png filter=lfs diff=lfs merge=lfs -text
|
14 |
+
evaluation_results/eval_20250208_161149/predictions.npz filter=lfs diff=lfs merge=lfs -text
|
15 |
+
evaluation_results/eval_20250401_143401/plots/roc_all_classes.png filter=lfs diff=lfs merge=lfs -text
|
16 |
+
evaluation_results/eval_20250401_143401/plots/roc_by_language.png filter=lfs diff=lfs merge=lfs -text
|
17 |
+
evaluation_results/eval_20250401_143401/plots/roc_identity_hate.png filter=lfs diff=lfs merge=lfs -text
|
18 |
+
evaluation_results/eval_20250401_143401/plots/roc_insult.png filter=lfs diff=lfs merge=lfs -text
|
19 |
+
evaluation_results/eval_20250401_143401/plots/roc_obscene.png filter=lfs diff=lfs merge=lfs -text
|
20 |
+
evaluation_results/eval_20250401_143401/plots/roc_severe_toxic.png filter=lfs diff=lfs merge=lfs -text
|
21 |
+
evaluation_results/eval_20250401_143401/plots/roc_threat.png filter=lfs diff=lfs merge=lfs -text
|
22 |
+
evaluation_results/eval_20250401_143401/plots/roc_toxic.png filter=lfs diff=lfs merge=lfs -text
|
23 |
+
evaluation_results/eval_20250401_143401/predictions.npz filter=lfs diff=lfs merge=lfs -text
|
24 |
+
images/class_distribution.png filter=lfs diff=lfs merge=lfs -text
|
25 |
+
images/language_distribution.png filter=lfs diff=lfs merge=lfs -text
|
26 |
+
images/toxicity_by_language.png filter=lfs diff=lfs merge=lfs -text
|
27 |
+
images/toxicity_correlation.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python cache files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
|
5 |
+
# Virtual environment
|
6 |
+
venv/
|
7 |
+
ENV/
|
8 |
+
env/
|
9 |
+
env.bak/
|
10 |
+
venv.bak/
|
11 |
+
|
12 |
+
# Gradio
|
13 |
+
.gradio/*
|
14 |
+
|
15 |
+
# Weights and Biases
|
16 |
+
weights/*
|
17 |
+
dataset/*
|
18 |
+
cache/*
|
19 |
+
wandb/*
|
20 |
+
|
21 |
+
# IDE and editor files
|
22 |
+
.idea/
|
23 |
+
.vscode/
|
24 |
+
*.swp
|
25 |
+
*.swo
|
26 |
+
|
27 |
+
# Jupyter Notebook checkpoints
|
28 |
+
.ipynb_checkpoints/
|
29 |
+
|
30 |
+
# Distribution / packaging
|
31 |
+
.Python
|
32 |
+
build/
|
33 |
+
develop-eggs/
|
34 |
+
dist/
|
35 |
+
downloads/
|
36 |
+
eggs/
|
37 |
+
lib/
|
38 |
+
lib64/
|
39 |
+
parts/
|
40 |
+
sdist/
|
41 |
+
var/
|
42 |
+
wheels/
|
43 |
+
share/python-wheels/
|
44 |
+
*.egg-info/
|
45 |
+
.installed.cfg
|
46 |
+
*.egg
|
47 |
+
|
48 |
+
# Pytest
|
49 |
+
.cache/
|
50 |
+
nosetests.xml
|
51 |
+
coverage.xml
|
52 |
+
*.cover
|
53 |
+
*.py,cover
|
54 |
+
.hypothesis/
|
55 |
+
.pytest_cache/
|
56 |
+
|
57 |
+
# mypy
|
58 |
+
.mypy_cache/
|
59 |
+
.dmypy.json
|
60 |
+
dmypy.json
|
61 |
+
|
62 |
+
# pyenv
|
63 |
+
.python-version
|
64 |
+
|
65 |
+
# pipenv
|
66 |
+
Pipfile.lock
|
67 |
+
|
68 |
+
# pyre type checker
|
69 |
+
.pyre/
|
70 |
+
|
71 |
+
# C extensions
|
72 |
+
*.so
|
73 |
+
|
74 |
+
# Backup files
|
75 |
+
*~
|
76 |
+
*.bak
|
77 |
+
*.tmp
|
78 |
+
|
79 |
+
#Logging
|
80 |
+
*.log
|
81 |
+
logs/
|
82 |
+
|
83 |
+
*.csv
|
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use CUDA-enabled PyTorch base image
|
2 |
+
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
|
3 |
+
|
4 |
+
# Set working directory
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Install system dependencies
|
8 |
+
RUN apt-get update && apt-get install -y \
|
9 |
+
git \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
# Copy requirements file
|
13 |
+
COPY requirements.txt .
|
14 |
+
|
15 |
+
# Install Python dependencies
|
16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
+
|
18 |
+
# Copy project files
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Create directories for data and models
|
22 |
+
RUN mkdir -p dataset/final_balanced weights
|
23 |
+
|
24 |
+
# Set environment variables
|
25 |
+
ENV PYTHONPATH=/app
|
26 |
+
ENV WANDB_API_KEY=""
|
27 |
+
|
28 |
+
# Default command to run training
|
29 |
+
CMD ["python", "model/train.py"]
|
analysis/analysis.txt
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
(venv) PS V:\Deeptanshu Lal\PROJECTS\Toxic Comment Classification> python .\analysis\analyze_lang_distribution.py
|
2 |
+
Reading dataset...
|
3 |
+
|
4 |
+
Dataset Overview:
|
5 |
+
--------------------------------------------------
|
6 |
+
Total number of comments: 361,228
|
7 |
+
Number of languages: 7
|
8 |
+
|
9 |
+
Language Distribution:
|
10 |
+
--------------------------------------------------
|
11 |
+
ru: 52,632 comments (14.57%)
|
12 |
+
tr: 52,558 comments (14.55%)
|
13 |
+
pt: 52,440 comments (14.52%)
|
14 |
+
es: 52,412 comments (14.51%)
|
15 |
+
fr: 52,368 comments (14.50%)
|
16 |
+
it: 52,340 comments (14.49%)
|
17 |
+
en: 46,478 comments (12.87%)
|
18 |
+
|
19 |
+
Class Distribution by Language:
|
20 |
+
--------------------------------------------------
|
21 |
+
|
22 |
+
RU (Total: 52,632 comments)
|
23 |
+
0 toxic classes: 26,316 (50.00%)
|
24 |
+
1 toxic classes: 7,688 (14.61%)
|
25 |
+
2 toxic classes: 8,010 (15.22%)
|
26 |
+
3 toxic classes: 7,103 (13.50%)
|
27 |
+
4 toxic classes: 2,740 (5.21%)
|
28 |
+
5 toxic classes: 706 (1.34%)
|
29 |
+
6 toxic classes: 69 (0.13%)
|
30 |
+
|
31 |
+
TR (Total: 52,558 comments)
|
32 |
+
0 toxic classes: 26,279 (50.00%)
|
33 |
+
1 toxic classes: 7,677 (14.61%)
|
34 |
+
2 toxic classes: 8,004 (15.23%)
|
35 |
+
3 toxic classes: 7,088 (13.49%)
|
36 |
+
4 toxic classes: 2,736 (5.21%)
|
37 |
+
5 toxic classes: 705 (1.34%)
|
38 |
+
6 toxic classes: 69 (0.13%)
|
39 |
+
|
40 |
+
PT (Total: 52,440 comments)
|
41 |
+
0 toxic classes: 26,220 (50.00%)
|
42 |
+
1 toxic classes: 7,668 (14.62%)
|
43 |
+
2 toxic classes: 7,977 (15.21%)
|
44 |
+
3 toxic classes: 7,071 (13.48%)
|
45 |
+
4 toxic classes: 2,732 (5.21%)
|
46 |
+
5 toxic classes: 703 (1.34%)
|
47 |
+
6 toxic classes: 69 (0.13%)
|
48 |
+
|
49 |
+
ES (Total: 52,412 comments)
|
50 |
+
0 toxic classes: 26,206 (50.00%)
|
51 |
+
1 toxic classes: 7,647 (14.59%)
|
52 |
+
2 toxic classes: 7,982 (15.23%)
|
53 |
+
3 toxic classes: 7,069 (13.49%)
|
54 |
+
4 toxic classes: 2,737 (5.22%)
|
55 |
+
5 toxic classes: 702 (1.34%)
|
56 |
+
6 toxic classes: 69 (0.13%)
|
57 |
+
|
58 |
+
FR (Total: 52,368 comments)
|
59 |
+
0 toxic classes: 26,184 (50.00%)
|
60 |
+
1 toxic classes: 7,626 (14.56%)
|
61 |
+
2 toxic classes: 7,990 (15.26%)
|
62 |
+
3 toxic classes: 7,066 (13.49%)
|
63 |
+
4 toxic classes: 2,728 (5.21%)
|
64 |
+
5 toxic classes: 705 (1.35%)
|
65 |
+
6 toxic classes: 69 (0.13%)
|
66 |
+
|
67 |
+
IT (Total: 52,340 comments)
|
68 |
+
0 toxic classes: 26,170 (50.00%)
|
69 |
+
1 toxic classes: 7,652 (14.62%)
|
70 |
+
2 toxic classes: 7,967 (15.22%)
|
71 |
+
3 toxic classes: 7,057 (13.48%)
|
72 |
+
4 toxic classes: 2,722 (5.20%)
|
73 |
+
5 toxic classes: 703 (1.34%)
|
74 |
+
6 toxic classes: 69 (0.13%)
|
75 |
+
|
76 |
+
EN (Total: 46,478 comments)
|
77 |
+
0 toxic classes: 22,989 (49.46%)
|
78 |
+
1 toxic classes: 8,499 (18.29%)
|
79 |
+
2 toxic classes: 5,604 (12.06%)
|
80 |
+
3 toxic classes: 6,391 (13.75%)
|
81 |
+
4 toxic classes: 2,395 (5.15%)
|
82 |
+
5 toxic classes: 553 (1.19%)
|
83 |
+
6 toxic classes: 47 (0.10%)
|
84 |
+
|
85 |
+
Detailed Toxicity Analysis by Language:
|
86 |
+
--------------------------------------------------
|
87 |
+
|
88 |
+
RU (Total: 52,632 comments)
|
89 |
+
- Toxic:
|
90 |
+
Count: 25,954 (49.31%)
|
91 |
+
95% CI: [48.89%, 49.74%]
|
92 |
+
- Severe Toxic:
|
93 |
+
Count: 2,441 (4.64%)
|
94 |
+
95% CI: [4.46%, 4.82%]
|
95 |
+
- Obscene:
|
96 |
+
Count: 12,432 (23.62%)
|
97 |
+
95% CI: [23.26%, 23.98%]
|
98 |
+
- Threat:
|
99 |
+
Count: 1,075 (2.04%)
|
100 |
+
95% CI: [1.92%, 2.16%]
|
101 |
+
- Insult:
|
102 |
+
Count: 15,207 (28.89%)
|
103 |
+
95% CI: [28.51%, 29.28%]
|
104 |
+
- Identity Hate:
|
105 |
+
Count: 2,812 (5.34%)
|
106 |
+
95% CI: [5.15%, 5.53%]
|
107 |
+
|
108 |
+
TR (Total: 52,558 comments)
|
109 |
+
- Toxic:
|
110 |
+
Count: 25,908 (49.29%)
|
111 |
+
95% CI: [48.87%, 49.72%]
|
112 |
+
- Severe Toxic:
|
113 |
+
Count: 2,439 (4.64%)
|
114 |
+
95% CI: [4.46%, 4.82%]
|
115 |
+
- Obscene:
|
116 |
+
Count: 12,411 (23.61%)
|
117 |
+
95% CI: [23.25%, 23.98%]
|
118 |
+
- Threat:
|
119 |
+
Count: 1,077 (2.05%)
|
120 |
+
95% CI: [1.93%, 2.17%]
|
121 |
+
- Insult:
|
122 |
+
Count: 15,170 (28.86%)
|
123 |
+
95% CI: [28.48%, 29.25%]
|
124 |
+
- Identity Hate:
|
125 |
+
Count: 2,827 (5.38%)
|
126 |
+
95% CI: [5.19%, 5.57%]
|
127 |
+
|
128 |
+
PT (Total: 52,440 comments)
|
129 |
+
- Toxic:
|
130 |
+
Count: 25,841 (49.28%)
|
131 |
+
95% CI: [48.85%, 49.71%]
|
132 |
+
- Severe Toxic:
|
133 |
+
Count: 2,432 (4.64%)
|
134 |
+
95% CI: [4.46%, 4.82%]
|
135 |
+
- Obscene:
|
136 |
+
Count: 12,395 (23.64%)
|
137 |
+
95% CI: [23.27%, 24.00%]
|
138 |
+
- Threat:
|
139 |
+
Count: 1,080 (2.06%)
|
140 |
+
95% CI: [1.94%, 2.18%]
|
141 |
+
- Insult:
|
142 |
+
Count: 15,143 (28.88%)
|
143 |
+
95% CI: [28.49%, 29.26%]
|
144 |
+
- Identity Hate:
|
145 |
+
Count: 2,801 (5.34%)
|
146 |
+
95% CI: [5.15%, 5.53%]
|
147 |
+
|
148 |
+
ES (Total: 52,412 comments)
|
149 |
+
- Toxic:
|
150 |
+
Count: 25,874 (49.37%)
|
151 |
+
95% CI: [48.94%, 49.79%]
|
152 |
+
- Severe Toxic:
|
153 |
+
Count: 2,432 (4.64%)
|
154 |
+
95% CI: [4.46%, 4.82%]
|
155 |
+
- Obscene:
|
156 |
+
Count: 12,388 (23.64%)
|
157 |
+
95% CI: [23.27%, 24.00%]
|
158 |
+
- Threat:
|
159 |
+
Count: 1,073 (2.05%)
|
160 |
+
95% CI: [1.93%, 2.17%]
|
161 |
+
- Insult:
|
162 |
+
Count: 15,140 (28.89%)
|
163 |
+
95% CI: [28.50%, 29.27%]
|
164 |
+
- Identity Hate:
|
165 |
+
Count: 2,783 (5.31%)
|
166 |
+
95% CI: [5.12%, 5.50%]
|
167 |
+
|
168 |
+
FR (Total: 52,368 comments)
|
169 |
+
- Toxic:
|
170 |
+
Count: 25,877 (49.41%)
|
171 |
+
95% CI: [48.99%, 49.84%]
|
172 |
+
- Severe Toxic:
|
173 |
+
Count: 2,428 (4.64%)
|
174 |
+
95% CI: [4.46%, 4.82%]
|
175 |
+
- Obscene:
|
176 |
+
Count: 12,379 (23.64%)
|
177 |
+
95% CI: [23.27%, 24.00%]
|
178 |
+
- Threat:
|
179 |
+
Count: 1,066 (2.04%)
|
180 |
+
95% CI: [1.91%, 2.16%]
|
181 |
+
- Insult:
|
182 |
+
Count: 15,131 (28.89%)
|
183 |
+
95% CI: [28.51%, 29.28%]
|
184 |
+
- Identity Hate:
|
185 |
+
Count: 2,774 (5.30%)
|
186 |
+
95% CI: [5.11%, 5.49%]
|
187 |
+
|
188 |
+
IT (Total: 52,340 comments)
|
189 |
+
- Toxic:
|
190 |
+
Count: 25,827 (49.34%)
|
191 |
+
95% CI: [48.92%, 49.77%]
|
192 |
+
- Severe Toxic:
|
193 |
+
Count: 2,429 (4.64%)
|
194 |
+
95% CI: [4.46%, 4.82%]
|
195 |
+
- Obscene:
|
196 |
+
Count: 12,341 (23.58%)
|
197 |
+
95% CI: [23.21%, 23.94%]
|
198 |
+
- Threat:
|
199 |
+
Count: 1,077 (2.06%)
|
200 |
+
95% CI: [1.94%, 2.18%]
|
201 |
+
- Insult:
|
202 |
+
Count: 15,118 (28.88%)
|
203 |
+
95% CI: [28.50%, 29.27%]
|
204 |
+
- Identity Hate:
|
205 |
+
Count: 2,782 (5.32%)
|
206 |
+
95% CI: [5.12%, 5.51%]
|
207 |
+
|
208 |
+
EN (Total: 46,478 comments)
|
209 |
+
- Toxic:
|
210 |
+
Count: 22,343 (48.07%)
|
211 |
+
95% CI: [47.62%, 48.53%]
|
212 |
+
- Severe Toxic:
|
213 |
+
Count: 1,986 (4.27%)
|
214 |
+
95% CI: [4.09%, 4.46%]
|
215 |
+
- Obscene:
|
216 |
+
Count: 12,356 (26.58%)
|
217 |
+
95% CI: [26.18%, 26.99%]
|
218 |
+
- Threat:
|
219 |
+
Count: 1,204 (2.59%)
|
220 |
+
95% CI: [2.45%, 2.73%]
|
221 |
+
- Insult:
|
222 |
+
Count: 11,475 (24.69%)
|
223 |
+
95% CI: [24.30%, 25.08%]
|
224 |
+
- Identity Hate:
|
225 |
+
Count: 2,143 (4.61%)
|
226 |
+
95% CI: [4.42%, 4.80%]
|
227 |
+
|
228 |
+
Statistical Analysis:
|
229 |
+
--------------------------------------------------
|
230 |
+
|
231 |
+
Chi-square test for number of toxic classes by language:
|
232 |
+
Chi-square statistic: 654.28
|
233 |
+
p-value: 0.0000000000
|
234 |
+
Significant at α=0.05: Yes
|
235 |
+
|
236 |
+
Chi-square test for Toxic:
|
237 |
+
Chi-square statistic: 26.10
|
238 |
+
p-value: 0.0002136602
|
239 |
+
Significant at α=0.05: Yes
|
240 |
+
|
241 |
+
Chi-square test for Severe Toxic:
|
242 |
+
Chi-square statistic: 12.38
|
243 |
+
p-value: 0.0540052211
|
244 |
+
Significant at α=0.05: No
|
245 |
+
|
246 |
+
Chi-square test for Obscene:
|
247 |
+
Chi-square statistic: 195.12
|
248 |
+
p-value: 0.0000000000
|
249 |
+
Significant at α=0.05: Yes
|
250 |
+
|
251 |
+
Chi-square test for Threat:
|
252 |
+
Chi-square statistic: 57.45
|
253 |
+
p-value: 0.0000000001
|
254 |
+
Significant at α=0.05: Yes
|
255 |
+
|
256 |
+
Chi-square test for Insult:
|
257 |
+
Chi-square statistic: 350.72
|
258 |
+
p-value: 0.0000000000
|
259 |
+
Significant at α=0.05: Yes
|
260 |
+
|
261 |
+
Chi-square test for Identity Hate:
|
262 |
+
Chi-square statistic: 42.77
|
263 |
+
p-value: 0.0000001295
|
264 |
+
Significant at α=0.05: Yes
|
analysis/analyze_lang_distribution.py
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import seaborn as sns
|
4 |
+
import numpy as np
|
5 |
+
from scipy import stats
|
6 |
+
import os
|
7 |
+
|
8 |
+
def set_style():
|
9 |
+
"""Set the style for all plots"""
|
10 |
+
# Use a basic style instead of seaborn
|
11 |
+
plt.style.use('default')
|
12 |
+
|
13 |
+
# Custom style settings
|
14 |
+
plt.rcParams['figure.figsize'] = (12, 6)
|
15 |
+
plt.rcParams['font.size'] = 10
|
16 |
+
plt.rcParams['axes.titlesize'] = 14
|
17 |
+
plt.rcParams['axes.labelsize'] = 12
|
18 |
+
plt.rcParams['axes.grid'] = True
|
19 |
+
plt.rcParams['grid.alpha'] = 0.3
|
20 |
+
|
21 |
+
# Custom color palette
|
22 |
+
colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF99CC', '#99FFCC', '#FFB366']
|
23 |
+
return colors
|
24 |
+
|
25 |
+
def create_language_distribution_plot(df, lang_dist, lang_percent, colors, image_dir):
|
26 |
+
"""Create and save language distribution plot"""
|
27 |
+
plt.figure(figsize=(14, 8))
|
28 |
+
|
29 |
+
# Create bar positions
|
30 |
+
x = np.arange(len(lang_dist))
|
31 |
+
|
32 |
+
# Create bars with language names as x-ticks
|
33 |
+
bars = plt.bar(x, lang_dist.values, color=colors)
|
34 |
+
plt.title('Language Distribution in Multilingual Toxic Comment Dataset', pad=20)
|
35 |
+
plt.xlabel('Language', labelpad=10)
|
36 |
+
plt.ylabel('Number of Comments', labelpad=10)
|
37 |
+
|
38 |
+
# Set x-ticks to language names
|
39 |
+
plt.xticks(x, lang_dist.index, rotation=45)
|
40 |
+
|
41 |
+
# Add value labels on top of each bar with increased spacing
|
42 |
+
for i, bar in enumerate(bars):
|
43 |
+
height = bar.get_height()
|
44 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + (max(lang_dist.values) * 0.01),
|
45 |
+
f'{int(height):,}\n({lang_percent.values[i]:.1f}%)',
|
46 |
+
ha='center', va='bottom', fontsize=10)
|
47 |
+
|
48 |
+
# Add some padding to the top of the plot
|
49 |
+
plt.margins(y=0.2)
|
50 |
+
|
51 |
+
plt.tight_layout()
|
52 |
+
plt.savefig(os.path.join(image_dir, 'language_distribution.png'), dpi=300, bbox_inches='tight')
|
53 |
+
plt.close()
|
54 |
+
|
55 |
+
def create_toxicity_heatmap(df, toxicity_cols, image_dir):
|
56 |
+
"""Create and save toxicity correlation heatmap"""
|
57 |
+
plt.figure(figsize=(12, 10))
|
58 |
+
|
59 |
+
# Calculate correlation and sort
|
60 |
+
correlation = df[toxicity_cols].corr()
|
61 |
+
|
62 |
+
# Sort correlation matrix by mean correlation value
|
63 |
+
mean_corr = correlation.mean()
|
64 |
+
sorted_cols = mean_corr.sort_values(ascending=False).index
|
65 |
+
correlation = correlation.loc[sorted_cols, sorted_cols]
|
66 |
+
|
67 |
+
# Create heatmap with better styling
|
68 |
+
im = plt.imshow(correlation, cmap='RdYlBu_r', aspect='equal', vmin=0, vmax=1)
|
69 |
+
plt.colorbar(im, label='Correlation Coefficient')
|
70 |
+
|
71 |
+
# Add text annotations with conditional formatting
|
72 |
+
for i in range(len(correlation)):
|
73 |
+
for j in range(len(correlation)):
|
74 |
+
corr_value = correlation.iloc[i, j]
|
75 |
+
# Choose text color based on background
|
76 |
+
text_color = 'white' if abs(corr_value) > 0.7 else 'black'
|
77 |
+
# Make diagonal elements bold
|
78 |
+
fontweight = 'bold' if i == j else 'normal'
|
79 |
+
plt.text(j, i, f'{corr_value:.2f}',
|
80 |
+
ha='center', va='center',
|
81 |
+
color=text_color,
|
82 |
+
fontweight=fontweight,
|
83 |
+
fontsize=10)
|
84 |
+
|
85 |
+
# Improve title and labels
|
86 |
+
plt.title('Correlation between Different Types of Toxicity\n(Sorted by Average Correlation)',
|
87 |
+
pad=20, fontsize=14)
|
88 |
+
|
89 |
+
# Format axis labels
|
90 |
+
formatted_labels = [col.replace('_', ' ').title() for col in correlation.columns]
|
91 |
+
plt.xticks(range(len(formatted_labels)), formatted_labels, rotation=45, ha='right')
|
92 |
+
plt.yticks(range(len(formatted_labels)), formatted_labels)
|
93 |
+
|
94 |
+
# Add gridlines
|
95 |
+
plt.grid(False)
|
96 |
+
|
97 |
+
# Adjust layout
|
98 |
+
plt.tight_layout()
|
99 |
+
plt.savefig(os.path.join(image_dir, 'toxicity_correlation.png'), dpi=300, bbox_inches='tight')
|
100 |
+
plt.close()
|
101 |
+
|
102 |
+
def create_toxicity_by_language_plot(df, lang_dist, toxicity_cols, colors, image_dir):
|
103 |
+
"""Create and save toxicity distribution by language plot"""
|
104 |
+
plt.figure(figsize=(15, 8))
|
105 |
+
|
106 |
+
x = np.arange(len(lang_dist.index))
|
107 |
+
width = 0.15
|
108 |
+
multiplier = 0
|
109 |
+
|
110 |
+
for attribute, color in zip(toxicity_cols, colors):
|
111 |
+
# Calculate percentage of toxic comments (any value > 0)
|
112 |
+
attribute_means = [(df[df['lang'] == lang][attribute] > 0).mean() * 100
|
113 |
+
for lang in lang_dist.index]
|
114 |
+
|
115 |
+
offset = width * multiplier
|
116 |
+
rects = plt.bar(x + offset, attribute_means, width,
|
117 |
+
label=attribute.replace('_', ' ').title(),
|
118 |
+
color=color, alpha=0.8)
|
119 |
+
|
120 |
+
# Add value labels on the bars
|
121 |
+
for rect in rects:
|
122 |
+
height = rect.get_height()
|
123 |
+
plt.text(rect.get_x() + rect.get_width()/2., height,
|
124 |
+
f'{height:.1f}%', ha='center', va='bottom', fontsize=8)
|
125 |
+
|
126 |
+
multiplier += 1
|
127 |
+
|
128 |
+
plt.xlabel('Language')
|
129 |
+
plt.ylabel('Percentage of Toxic Comments (%)')
|
130 |
+
plt.title('Distribution of Toxicity Types by Language')
|
131 |
+
plt.xticks(x + width * 2.5, lang_dist.index, rotation=45)
|
132 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1, 1))
|
133 |
+
plt.grid(True, alpha=0.3)
|
134 |
+
|
135 |
+
plt.tight_layout()
|
136 |
+
plt.savefig(os.path.join(image_dir, 'toxicity_by_language.png'), dpi=300, bbox_inches='tight')
|
137 |
+
plt.close()
|
138 |
+
|
139 |
+
def create_class_distribution_plot(df, lang_dist, image_dir):
|
140 |
+
"""Create and save class distribution across languages plot"""
|
141 |
+
plt.figure(figsize=(16, 10))
|
142 |
+
|
143 |
+
# Define toxicity columns and their display names
|
144 |
+
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
145 |
+
display_names = [col.replace('_', ' ').title() for col in toxicity_cols]
|
146 |
+
|
147 |
+
# Calculate class distribution for each language
|
148 |
+
class_dist = {}
|
149 |
+
non_toxic_dist = {} # Store non-toxic percentages
|
150 |
+
for lang in lang_dist.index:
|
151 |
+
lang_df = df[df['lang'] == lang]
|
152 |
+
total = len(lang_df)
|
153 |
+
|
154 |
+
# Create a binary matrix of toxicity flags
|
155 |
+
toxic_matrix = lang_df[toxicity_cols].astype(bool)
|
156 |
+
|
157 |
+
# Calculate non-toxic percentage (comments with no toxic flags)
|
158 |
+
non_toxic_mask = ~toxic_matrix.any(axis=1)
|
159 |
+
non_toxic_percent = (non_toxic_mask.sum() / total) * 100
|
160 |
+
non_toxic_dist[lang] = non_toxic_percent
|
161 |
+
|
162 |
+
# Calculate percentages for each toxicity type
|
163 |
+
class_dist[lang] = [(toxic_matrix[col].sum() / total) * 100 for col in toxicity_cols]
|
164 |
+
|
165 |
+
# Create stacked bar chart
|
166 |
+
x = np.arange(len(lang_dist.index))
|
167 |
+
|
168 |
+
# Use a color scheme with an additional color for non-toxic
|
169 |
+
colors = plt.cm.Set3(np.linspace(0, 1, len(toxicity_cols) + 1))
|
170 |
+
|
171 |
+
# First, plot non-toxic comments
|
172 |
+
non_toxic_values = [non_toxic_dist[lang] for lang in lang_dist.index]
|
173 |
+
non_toxic_bar = plt.bar(x, non_toxic_values, label='Non-Toxic', color=colors[0], alpha=0.9)
|
174 |
+
|
175 |
+
# Add percentage labels for non-toxic
|
176 |
+
for j, v in enumerate(non_toxic_values):
|
177 |
+
if v > 1: # Show all values above 1%
|
178 |
+
plt.text(x[j], v/2, f'{v:.1f}%',
|
179 |
+
ha='center', va='center',
|
180 |
+
color='black',
|
181 |
+
fontweight='bold',
|
182 |
+
fontsize=9)
|
183 |
+
|
184 |
+
# Initialize bottom array with non-toxic values
|
185 |
+
bottom = np.array(non_toxic_values)
|
186 |
+
|
187 |
+
# Then plot toxic categories
|
188 |
+
bars = [non_toxic_bar]
|
189 |
+
for i, (col, display_name) in enumerate(zip(toxicity_cols, display_names)):
|
190 |
+
values = [class_dist[lang][i] for lang in lang_dist.index]
|
191 |
+
bar = plt.bar(x, values, bottom=bottom, label=display_name, color=colors[i+1], alpha=0.9)
|
192 |
+
bars.append(bar)
|
193 |
+
|
194 |
+
# Add percentage labels for all values > 1%
|
195 |
+
for j, v in enumerate(values):
|
196 |
+
if v > 1: # Show all values above 1%
|
197 |
+
center = bottom[j] + v/2
|
198 |
+
text_color = 'black' if v > 10 else 'black'
|
199 |
+
plt.text(x[j], center, f'{v:.1f}%',
|
200 |
+
ha='center', va='center',
|
201 |
+
color=text_color,
|
202 |
+
fontweight='bold',
|
203 |
+
fontsize=9)
|
204 |
+
bottom = bottom + np.array(values) # Update bottom array correctly
|
205 |
+
|
206 |
+
plt.xlabel('Language', labelpad=10, fontsize=12)
|
207 |
+
plt.ylabel('Percentage of Comments', labelpad=10, fontsize=12)
|
208 |
+
plt.title('Distribution of Non-Toxic and Toxic Comments by Language', pad=20, fontsize=14)
|
209 |
+
plt.xticks(x, lang_dist.index, rotation=45, fontsize=10)
|
210 |
+
|
211 |
+
# Adjust legend
|
212 |
+
plt.legend(title='Comment Types',
|
213 |
+
bbox_to_anchor=(1.15, 1),
|
214 |
+
loc='upper left',
|
215 |
+
fontsize=10,
|
216 |
+
title_fontsize=12)
|
217 |
+
|
218 |
+
# Add grid for better readability
|
219 |
+
plt.grid(True, axis='y', alpha=0.3)
|
220 |
+
|
221 |
+
# Adjust layout to prevent label cutoff
|
222 |
+
plt.margins(y=0.1)
|
223 |
+
plt.tight_layout()
|
224 |
+
plt.savefig(os.path.join(image_dir, 'class_distribution.png'), dpi=300, bbox_inches='tight')
|
225 |
+
plt.close()
|
226 |
+
|
227 |
+
def analyze_language_distribution():
|
228 |
+
"""Analyze language distribution and toxicity patterns in the dataset"""
|
229 |
+
# Create images directory if it doesn't exist
|
230 |
+
image_dir = 'images'
|
231 |
+
os.makedirs(image_dir, exist_ok=True)
|
232 |
+
|
233 |
+
# Set style and get color palette
|
234 |
+
colors = set_style()
|
235 |
+
|
236 |
+
# Read the dataset
|
237 |
+
print("Reading dataset...")
|
238 |
+
input_file = 'dataset/split/train.csv'
|
239 |
+
df = pd.read_csv(input_file)
|
240 |
+
|
241 |
+
# Get language distribution
|
242 |
+
lang_dist = df['lang'].value_counts()
|
243 |
+
lang_percent = df['lang'].value_counts(normalize=True) * 100
|
244 |
+
|
245 |
+
# Print basic statistics
|
246 |
+
print("\nDataset Overview:")
|
247 |
+
print("-" * 50)
|
248 |
+
print("Input file: ", input_file)
|
249 |
+
print(f"Total number of comments: {len(df):,}")
|
250 |
+
print(f"Number of languages: {df['lang'].nunique()}")
|
251 |
+
|
252 |
+
print("\nLanguage Distribution:")
|
253 |
+
print("-" * 50)
|
254 |
+
for lang, count in lang_dist.items():
|
255 |
+
print(f"{lang}: {count:,} comments ({lang_percent[lang]:.2f}%)")
|
256 |
+
|
257 |
+
# Create language distribution plot
|
258 |
+
create_language_distribution_plot(df, lang_dist, lang_percent, colors, image_dir)
|
259 |
+
|
260 |
+
# Analyze toxicity
|
261 |
+
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
262 |
+
|
263 |
+
# Create correlation heatmap
|
264 |
+
create_toxicity_heatmap(df, toxicity_cols, image_dir)
|
265 |
+
|
266 |
+
# Create toxicity by language plot
|
267 |
+
create_toxicity_by_language_plot(df, lang_dist, toxicity_cols, colors, image_dir)
|
268 |
+
|
269 |
+
# Create class distribution plot
|
270 |
+
create_class_distribution_plot(df, lang_dist, image_dir)
|
271 |
+
|
272 |
+
# Print class distribution statistics
|
273 |
+
print("\nClass Distribution by Language:")
|
274 |
+
print("-" * 50)
|
275 |
+
|
276 |
+
for lang in lang_dist.index:
|
277 |
+
lang_df = df[df['lang'] == lang]
|
278 |
+
total = len(lang_df)
|
279 |
+
|
280 |
+
print(f"\n{lang.upper()} (Total: {total:,} comments)")
|
281 |
+
|
282 |
+
# Count comments by number of toxic classes
|
283 |
+
toxic_counts = lang_df[toxicity_cols].astype(bool).sum(axis=1)
|
284 |
+
class_dist = toxic_counts.value_counts().sort_index()
|
285 |
+
|
286 |
+
for n_classes, count in class_dist.items():
|
287 |
+
percentage = (count / total) * 100
|
288 |
+
print(f"{n_classes} toxic classes: {count:,} ({percentage:.2f}%)")
|
289 |
+
|
290 |
+
# Detailed toxicity analysis by language
|
291 |
+
print("\nDetailed Toxicity Analysis by Language:")
|
292 |
+
print("-" * 50)
|
293 |
+
|
294 |
+
for lang in lang_dist.index:
|
295 |
+
lang_df = df[df['lang'] == lang]
|
296 |
+
print(f"\n{lang.upper()} (Total: {len(lang_df):,} comments)")
|
297 |
+
|
298 |
+
# Calculate toxicity statistics
|
299 |
+
for col in toxicity_cols:
|
300 |
+
toxic_count = (lang_df[col] > 0).sum()
|
301 |
+
toxic_percent = (toxic_count / len(lang_df)) * 100
|
302 |
+
|
303 |
+
# Calculate confidence interval
|
304 |
+
ci = stats.norm.interval(0.95,
|
305 |
+
loc=toxic_percent/100,
|
306 |
+
scale=np.sqrt((toxic_percent/100 * (1-toxic_percent/100)) / len(lang_df)))
|
307 |
+
ci_lower, ci_upper = ci[0] * 100, ci[1] * 100
|
308 |
+
|
309 |
+
print(f"- {col.replace('_', ' ').title()}:")
|
310 |
+
print(f" Count: {toxic_count:,} ({toxic_percent:.2f}%)")
|
311 |
+
print(f" 95% CI: [{ci_lower:.2f}%, {ci_upper:.2f}%]")
|
312 |
+
|
313 |
+
# Statistical tests
|
314 |
+
print("\nStatistical Analysis:")
|
315 |
+
print("-" * 50)
|
316 |
+
|
317 |
+
# Chi-square test for independence between language and number of toxic classes
|
318 |
+
toxic_class_counts = pd.crosstab(df['lang'], df[toxicity_cols].astype(bool).sum(axis=1))
|
319 |
+
chi2, p_value, _, _ = stats.chi2_contingency(toxic_class_counts)
|
320 |
+
print("\nChi-square test for number of toxic classes by language:")
|
321 |
+
print(f"Chi-square statistic: {chi2:.2f}")
|
322 |
+
print(f"p-value: {p_value:.10f}")
|
323 |
+
print(f"Significant at α=0.05: {'Yes' if p_value < 0.05 else 'No'}")
|
324 |
+
|
325 |
+
# Chi-square test for each toxicity type
|
326 |
+
for col in toxicity_cols:
|
327 |
+
binary_col = (df[col] > 0).astype(int)
|
328 |
+
contingency_table = pd.crosstab(df['lang'], binary_col)
|
329 |
+
chi2, p_value, _, _ = stats.chi2_contingency(contingency_table)
|
330 |
+
print(f"\nChi-square test for {col.replace('_', ' ').title()}:")
|
331 |
+
print(f"Chi-square statistic: {chi2:.2f}")
|
332 |
+
print(f"p-value: {p_value:.10f}")
|
333 |
+
print(f"Significant at α=0.05: {'Yes' if p_value < 0.05 else 'No'}")
|
334 |
+
|
335 |
+
if __name__ == "__main__":
|
336 |
+
analyze_language_distribution()
|
analysis/compute_class_weights.py
ADDED
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
from typing import Dict, List
|
5 |
+
import logging
|
6 |
+
|
7 |
+
# Configure logging
|
8 |
+
logging.basicConfig(
|
9 |
+
level=logging.INFO,
|
10 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
11 |
+
)
|
12 |
+
|
13 |
+
def validate_parameters(params: Dict) -> Dict:
|
14 |
+
"""
|
15 |
+
Validate weight calculation parameters to prevent dangerous combinations.
|
16 |
+
Includes validation for focal loss parameters.
|
17 |
+
"""
|
18 |
+
# Check for dangerous weight scaling
|
19 |
+
if params['boost_factor'] * params['max_weight'] > 30:
|
20 |
+
raise ValueError(f"Dangerous weight scaling detected: boost_factor * max_weight = {params['boost_factor'] * params['max_weight']}")
|
21 |
+
|
22 |
+
# Validate focal loss parameters
|
23 |
+
if not 0 < params['gamma'] <= 5.0:
|
24 |
+
raise ValueError(f"Invalid gamma value: {params['gamma']}. Must be in (0, 5.0]")
|
25 |
+
|
26 |
+
if not 0 < params['alpha'] < 1:
|
27 |
+
raise ValueError(f"Invalid alpha value: {params['alpha']}. Must be in (0, 1)")
|
28 |
+
|
29 |
+
# Check for potentially unstable combinations
|
30 |
+
if params['gamma'] > 3.0 and params['boost_factor'] > 1.5:
|
31 |
+
logging.warning(f"Potentially unstable combination: high gamma ({params['gamma']}) with high boost factor ({params['boost_factor']})")
|
32 |
+
|
33 |
+
if params['alpha'] > 0.4 and params['boost_factor'] > 1.5:
|
34 |
+
logging.warning(f"Potentially unstable combination: high alpha ({params['alpha']}) with high boost factor ({params['boost_factor']})")
|
35 |
+
|
36 |
+
return params
|
37 |
+
|
38 |
+
def calculate_safe_weights(
|
39 |
+
support_0: int,
|
40 |
+
support_1: int,
|
41 |
+
max_weight: float = 15.0,
|
42 |
+
min_weight: float = 0.5,
|
43 |
+
gamma: float = 2.0,
|
44 |
+
alpha: float = 0.25,
|
45 |
+
boost_factor: float = 1.0,
|
46 |
+
num_classes: int = 6,
|
47 |
+
lang: str = None,
|
48 |
+
toxicity_type: str = None
|
49 |
+
) -> Dict[str, float]:
|
50 |
+
"""
|
51 |
+
Calculate class weights with focal loss and adaptive scaling.
|
52 |
+
Uses focal loss components for better handling of imbalanced classes
|
53 |
+
while preserving language-specific adjustments.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
support_0: Number of negative samples
|
57 |
+
support_1: Number of positive samples
|
58 |
+
max_weight: Maximum allowed weight
|
59 |
+
min_weight: Minimum allowed weight
|
60 |
+
gamma: Focal loss gamma parameter for down-weighting easy examples
|
61 |
+
alpha: Focal loss alpha parameter for balancing positive/negative classes
|
62 |
+
boost_factor: Optional boost for specific classes
|
63 |
+
num_classes: Number of toxicity classes (default=6)
|
64 |
+
lang: Language code for language-specific constraints
|
65 |
+
toxicity_type: Type of toxicity for class-specific constraints
|
66 |
+
"""
|
67 |
+
# Input validation with detailed error messages
|
68 |
+
if support_0 < 0 or support_1 < 0:
|
69 |
+
raise ValueError(f"Negative sample counts: support_0={support_0}, support_1={support_1}")
|
70 |
+
|
71 |
+
eps = 1e-7 # Small epsilon for numerical stability
|
72 |
+
total = support_0 + support_1 + eps
|
73 |
+
|
74 |
+
# Handle empty dataset case
|
75 |
+
if total <= eps:
|
76 |
+
logging.warning(f"Empty dataset for {toxicity_type} in {lang}")
|
77 |
+
return {
|
78 |
+
"0": 1.0,
|
79 |
+
"1": 1.0,
|
80 |
+
"support_0": support_0,
|
81 |
+
"support_1": support_1,
|
82 |
+
"raw_weight_1": 1.0,
|
83 |
+
"calculation_metadata": {
|
84 |
+
"formula": "default_weights_empty_dataset",
|
85 |
+
"constraints_applied": ["empty_dataset_fallback"]
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
# Handle zero support cases safely
|
90 |
+
if support_1 == 0:
|
91 |
+
logging.warning(f"No positive samples for {toxicity_type} in {lang}")
|
92 |
+
return {
|
93 |
+
"0": 1.0,
|
94 |
+
"1": max_weight,
|
95 |
+
"support_0": support_0,
|
96 |
+
"support_1": support_1,
|
97 |
+
"raw_weight_1": max_weight,
|
98 |
+
"calculation_metadata": {
|
99 |
+
"formula": "max_weight_no_positives",
|
100 |
+
"constraints_applied": ["no_positives_fallback"]
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
# Determine effective maximum weight based on class and language
|
105 |
+
if lang == 'en' and toxicity_type == 'threat':
|
106 |
+
effective_max = min(max_weight, 15.0) # Absolute cap for EN threat
|
107 |
+
elif toxicity_type == 'identity_hate':
|
108 |
+
effective_max = min(max_weight, 10.0) # Cap for identity hate
|
109 |
+
else:
|
110 |
+
effective_max = max_weight
|
111 |
+
|
112 |
+
try:
|
113 |
+
# Calculate class frequencies
|
114 |
+
freq_1 = support_1 / total
|
115 |
+
freq_0 = support_0 / total
|
116 |
+
|
117 |
+
# Focal loss components
|
118 |
+
pt = freq_1 + eps # Probability of target class
|
119 |
+
modulating_factor = (1 - pt) ** gamma
|
120 |
+
balanced_alpha = alpha / (alpha + (1 - alpha) * (1 - pt))
|
121 |
+
|
122 |
+
# Base weight calculation with focal loss
|
123 |
+
raw_weight_1 = balanced_alpha * modulating_factor / (pt + eps)
|
124 |
+
|
125 |
+
# Apply adaptive scaling for severe classes
|
126 |
+
if toxicity_type in ['threat', 'identity_hate']:
|
127 |
+
severity_factor = (1 + np.log1p(total) / np.log1p(support_1)) / 2
|
128 |
+
raw_weight_1 *= severity_factor
|
129 |
+
|
130 |
+
# Apply boost factor
|
131 |
+
raw_weight_1 *= boost_factor
|
132 |
+
|
133 |
+
# Detect potential numerical instability
|
134 |
+
if not np.isfinite(raw_weight_1):
|
135 |
+
logging.error(f"Numerical instability detected for {toxicity_type} in {lang}")
|
136 |
+
raw_weight_1 = effective_max
|
137 |
+
|
138 |
+
except Exception as e:
|
139 |
+
logging.error(f"Weight calculation error: {str(e)}")
|
140 |
+
raw_weight_1 = effective_max
|
141 |
+
|
142 |
+
# Apply safety limits with effective maximum
|
143 |
+
weight_1 = min(effective_max, max(min_weight, raw_weight_1))
|
144 |
+
weight_0 = 1.0 # Reference weight for majority class
|
145 |
+
|
146 |
+
# Round weights for consistency and to prevent floating point issues
|
147 |
+
weight_1 = round(float(weight_1), 3)
|
148 |
+
weight_0 = round(float(weight_0), 3)
|
149 |
+
|
150 |
+
return {
|
151 |
+
"0": weight_0,
|
152 |
+
"1": weight_1,
|
153 |
+
"support_0": support_0,
|
154 |
+
"support_1": support_1,
|
155 |
+
"raw_weight_1": round(float(raw_weight_1), 3),
|
156 |
+
"calculation_metadata": {
|
157 |
+
"formula": "focal_loss_with_adaptive_scaling",
|
158 |
+
"gamma": round(float(gamma), 3),
|
159 |
+
"alpha": round(float(alpha), 3),
|
160 |
+
"final_pt": round(float(pt), 4),
|
161 |
+
"effective_max": round(float(effective_max), 3),
|
162 |
+
"modulating_factor": round(float(modulating_factor), 4),
|
163 |
+
"balanced_alpha": round(float(balanced_alpha), 4),
|
164 |
+
"severity_adjusted": toxicity_type in ['threat', 'identity_hate'],
|
165 |
+
"boost_factor": round(float(boost_factor), 3),
|
166 |
+
"constraints_applied": [
|
167 |
+
f"max_weight={effective_max}",
|
168 |
+
f"boost={boost_factor}",
|
169 |
+
f"numerical_stability=enforced",
|
170 |
+
f"adaptive_scaling={'enabled' if toxicity_type in ['threat', 'identity_hate'] else 'disabled'}"
|
171 |
+
]
|
172 |
+
}
|
173 |
+
}
|
174 |
+
|
175 |
+
def get_language_specific_params(lang: str, toxicity_type: str) -> Dict:
|
176 |
+
"""
|
177 |
+
Get language and class specific parameters for weight calculation.
|
178 |
+
Includes focal loss parameters and their adjustments per language/class.
|
179 |
+
"""
|
180 |
+
# Default parameters
|
181 |
+
default_params = {
|
182 |
+
"max_weight": 15.0,
|
183 |
+
"min_weight": 0.5,
|
184 |
+
"boost_factor": 1.0,
|
185 |
+
"gamma": 2.0, # Default focal loss gamma
|
186 |
+
"alpha": 0.25 # Default focal loss alpha
|
187 |
+
}
|
188 |
+
|
189 |
+
# Updated language-specific adjustments based on analysis
|
190 |
+
lang_adjustments = {
|
191 |
+
"en": {
|
192 |
+
"toxic": {
|
193 |
+
"boost_factor": 1.67, # To achieve ~3.5x weight
|
194 |
+
"gamma": 2.5 # More focus on hard examples for main class
|
195 |
+
},
|
196 |
+
"threat": {
|
197 |
+
"max_weight": 15.0, # Absolute maximum cap
|
198 |
+
"gamma": 3.0, # Higher gamma for severe class
|
199 |
+
"alpha": 0.3 # Slightly higher alpha for better recall
|
200 |
+
},
|
201 |
+
"identity_hate": {
|
202 |
+
"max_weight": 5.0, # Reduced from 8.4
|
203 |
+
"gamma": 3.0, # Higher gamma for severe class
|
204 |
+
"alpha": 0.3 # Slightly higher alpha for better recall
|
205 |
+
},
|
206 |
+
"severe_toxic": {
|
207 |
+
"max_weight": 3.9, # Corrected weight
|
208 |
+
"gamma": 2.5 # Moderate gamma for balance
|
209 |
+
}
|
210 |
+
},
|
211 |
+
"tr": {
|
212 |
+
"threat": {
|
213 |
+
"max_weight": 12.8, # Aligned with cross-lingual ratio
|
214 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
215 |
+
},
|
216 |
+
"identity_hate": {
|
217 |
+
"max_weight": 6.2, # Adjusted for balance
|
218 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
219 |
+
}
|
220 |
+
},
|
221 |
+
"ru": {
|
222 |
+
"threat": {
|
223 |
+
"max_weight": 12.8, # Aligned with cross-lingual ratio
|
224 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
225 |
+
},
|
226 |
+
"identity_hate": {
|
227 |
+
"max_weight": 7.0, # Adjusted for balance
|
228 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
229 |
+
}
|
230 |
+
},
|
231 |
+
"fr": {
|
232 |
+
"toxic": {
|
233 |
+
"boost_factor": 1.2, # To achieve ~2.2x weight
|
234 |
+
"gamma": 2.2 # Lower gamma for better stability
|
235 |
+
}
|
236 |
+
}
|
237 |
+
}
|
238 |
+
|
239 |
+
# Get language-specific params and validate
|
240 |
+
lang_params = lang_adjustments.get(lang, {})
|
241 |
+
class_params = lang_params.get(toxicity_type, {})
|
242 |
+
merged_params = {**default_params, **class_params}
|
243 |
+
|
244 |
+
return validate_parameters(merged_params)
|
245 |
+
|
246 |
+
def check_cross_language_consistency(lang_weights: Dict) -> List[str]:
|
247 |
+
"""
|
248 |
+
Check for consistency of weights across languages.
|
249 |
+
Returns a list of warnings for significant disparities.
|
250 |
+
"""
|
251 |
+
warnings = []
|
252 |
+
baseline = lang_weights['en']
|
253 |
+
|
254 |
+
for lang in lang_weights:
|
255 |
+
if lang == 'en':
|
256 |
+
continue
|
257 |
+
|
258 |
+
for cls in ['threat', 'identity_hate']:
|
259 |
+
if cls in lang_weights[lang] and cls in baseline:
|
260 |
+
ratio = lang_weights[lang][cls]['1'] / baseline[cls]['1']
|
261 |
+
if ratio > 1.5 or ratio < 0.67:
|
262 |
+
warning = f"Large {cls} weight disparity: {lang} vs en ({ratio:.2f}x)"
|
263 |
+
warnings.append(warning)
|
264 |
+
logging.warning(warning)
|
265 |
+
|
266 |
+
return warnings
|
267 |
+
|
268 |
+
def validate_dataset_balance(df: pd.DataFrame) -> bool:
|
269 |
+
"""
|
270 |
+
Validate dataset balance across languages.
|
271 |
+
Returns False if imbalance exceeds threshold.
|
272 |
+
"""
|
273 |
+
sample_counts = df.groupby('lang').size()
|
274 |
+
cv = sample_counts.std() / sample_counts.mean()
|
275 |
+
|
276 |
+
if cv > 0.15: # 15% threshold for coefficient of variation
|
277 |
+
logging.error(f"Dataset language imbalance exceeds 15% (CV={cv:.2%})")
|
278 |
+
for lang, count in sample_counts.items():
|
279 |
+
logging.warning(f"{lang}: {count:,} samples ({count/len(df):.1%})")
|
280 |
+
return False
|
281 |
+
return True
|
282 |
+
|
283 |
+
def validate_weights(lang_weights: Dict) -> List[str]:
|
284 |
+
"""
|
285 |
+
Ensure weights meet multilingual safety criteria.
|
286 |
+
Validates weight ratios and focal loss parameters across languages.
|
287 |
+
|
288 |
+
Args:
|
289 |
+
lang_weights: Dictionary of weights per language and class
|
290 |
+
|
291 |
+
Returns:
|
292 |
+
List of validation warnings
|
293 |
+
|
294 |
+
Raises:
|
295 |
+
ValueError: If weights violate safety constraints
|
296 |
+
"""
|
297 |
+
warnings = []
|
298 |
+
|
299 |
+
for lang in lang_weights:
|
300 |
+
for cls in lang_weights[lang]:
|
301 |
+
w1 = lang_weights[lang][cls]['1']
|
302 |
+
w0 = lang_weights[lang][cls]['0']
|
303 |
+
|
304 |
+
# Check weight ratio sanity
|
305 |
+
ratio = w1 / w0
|
306 |
+
if ratio > 30:
|
307 |
+
raise ValueError(
|
308 |
+
f"Dangerous weight ratio {ratio:.1f}x for {lang} {cls}. "
|
309 |
+
f"Weight_1={w1:.3f}, Weight_0={w0:.3f}"
|
310 |
+
)
|
311 |
+
elif ratio > 20:
|
312 |
+
warnings.append(
|
313 |
+
f"High weight ratio {ratio:.1f}x for {lang} {cls}"
|
314 |
+
)
|
315 |
+
|
316 |
+
# Check focal parameter boundaries
|
317 |
+
metadata = lang_weights[lang][cls]['calculation_metadata']
|
318 |
+
gamma = metadata.get('gamma', 0.0)
|
319 |
+
alpha = metadata.get('alpha', 0.0)
|
320 |
+
|
321 |
+
if gamma > 5.0:
|
322 |
+
raise ValueError(
|
323 |
+
f"Unsafe gamma={gamma:.1f} for {lang} {cls}. "
|
324 |
+
f"Must be <= 5.0"
|
325 |
+
)
|
326 |
+
elif gamma > 4.0:
|
327 |
+
warnings.append(
|
328 |
+
f"High gamma={gamma:.1f} for {lang} {cls}"
|
329 |
+
)
|
330 |
+
|
331 |
+
if alpha > 0.9:
|
332 |
+
raise ValueError(
|
333 |
+
f"Unsafe alpha={alpha:.2f} for {lang} {cls}. "
|
334 |
+
f"Must be < 0.9"
|
335 |
+
)
|
336 |
+
elif alpha > 0.7:
|
337 |
+
warnings.append(
|
338 |
+
f"High alpha={alpha:.2f} for {lang} {cls}"
|
339 |
+
)
|
340 |
+
|
341 |
+
# Check for combined risk factors
|
342 |
+
if gamma > 3.0 and ratio > 15:
|
343 |
+
warnings.append(
|
344 |
+
f"Risky combination for {lang} {cls}: "
|
345 |
+
f"gamma={gamma:.1f}, ratio={ratio:.1f}x"
|
346 |
+
)
|
347 |
+
|
348 |
+
return warnings
|
349 |
+
|
350 |
+
def compute_language_weights(df: pd.DataFrame) -> Dict:
|
351 |
+
"""
|
352 |
+
Compute weights with inter-language normalization to ensure consistent
|
353 |
+
weighting across languages while preserving relative class relationships.
|
354 |
+
"""
|
355 |
+
# Validate dataset balance first
|
356 |
+
if not validate_dataset_balance(df):
|
357 |
+
logging.warning("Proceeding with imbalanced dataset - weights may need manual adjustment")
|
358 |
+
|
359 |
+
lang_weights = {}
|
360 |
+
toxicity_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
361 |
+
|
362 |
+
# First pass: calculate raw weights for each language and class
|
363 |
+
logging.info("\nFirst pass: Calculating raw weights")
|
364 |
+
for lang in df['lang'].unique():
|
365 |
+
logging.info(f"\nProcessing language: {lang}")
|
366 |
+
lang_df = df[df['lang'] == lang]
|
367 |
+
lang_weights[lang] = {}
|
368 |
+
|
369 |
+
for col in toxicity_columns:
|
370 |
+
y = lang_df[col].values.astype(np.int32)
|
371 |
+
support_0 = int((y == 0).sum())
|
372 |
+
support_1 = int((y == 1).sum())
|
373 |
+
|
374 |
+
params = get_language_specific_params(lang, col)
|
375 |
+
weights = calculate_safe_weights(
|
376 |
+
support_0=support_0,
|
377 |
+
support_1=support_1,
|
378 |
+
max_weight=params['max_weight'],
|
379 |
+
min_weight=params['min_weight'],
|
380 |
+
gamma=params['gamma'],
|
381 |
+
alpha=params['alpha'],
|
382 |
+
boost_factor=params['boost_factor'],
|
383 |
+
lang=lang,
|
384 |
+
toxicity_type=col
|
385 |
+
)
|
386 |
+
lang_weights[lang][col] = weights
|
387 |
+
|
388 |
+
# Log initial weights
|
389 |
+
logging.info(f" {col} - Initial weights:")
|
390 |
+
logging.info(f" Class 0: {weights['0']:.3f}, samples: {support_0:,}")
|
391 |
+
logging.info(f" Class 1: {weights['1']:.3f}, samples: {support_1:,}")
|
392 |
+
|
393 |
+
# Second pass: normalize weights across languages
|
394 |
+
logging.info("\nSecond pass: Normalizing weights across languages")
|
395 |
+
for col in toxicity_columns:
|
396 |
+
# Find maximum weight for this toxicity type across all languages
|
397 |
+
max_weight = max(
|
398 |
+
lang_weights[lang][col]['1']
|
399 |
+
for lang in lang_weights
|
400 |
+
)
|
401 |
+
|
402 |
+
if max_weight > 0: # Prevent division by zero
|
403 |
+
logging.info(f"\nNormalizing {col}:")
|
404 |
+
logging.info(f" Maximum weight across languages: {max_weight:.3f}")
|
405 |
+
|
406 |
+
# Normalize weights for each language
|
407 |
+
for lang in lang_weights:
|
408 |
+
original_weight = lang_weights[lang][col]['1']
|
409 |
+
|
410 |
+
# Normalize and rescale
|
411 |
+
normalized_weight = (original_weight / max_weight) * 15.0
|
412 |
+
|
413 |
+
# Update weight while preserving metadata
|
414 |
+
lang_weights[lang][col]['raw_weight_1'] = original_weight
|
415 |
+
lang_weights[lang][col]['1'] = round(normalized_weight, 3)
|
416 |
+
|
417 |
+
# Add normalization info to metadata
|
418 |
+
lang_weights[lang][col]['calculation_metadata'].update({
|
419 |
+
'normalization': {
|
420 |
+
'original_weight': round(float(original_weight), 3),
|
421 |
+
'max_weight_across_langs': round(float(max_weight), 3),
|
422 |
+
'normalization_factor': round(float(15.0 / max_weight), 3)
|
423 |
+
}
|
424 |
+
})
|
425 |
+
|
426 |
+
# Log normalization results
|
427 |
+
logging.info(f" {lang}: {original_weight:.3f} → {normalized_weight:.3f}")
|
428 |
+
|
429 |
+
# Validate final weights
|
430 |
+
logging.info("\nValidating final weights:")
|
431 |
+
for col in toxicity_columns:
|
432 |
+
weights_range = [
|
433 |
+
lang_weights[lang][col]['1']
|
434 |
+
for lang in lang_weights
|
435 |
+
]
|
436 |
+
logging.info(f" {col}: range [{min(weights_range):.3f}, {max(weights_range):.3f}]")
|
437 |
+
|
438 |
+
# Validate weights meet safety criteria
|
439 |
+
validation_warnings = validate_weights(lang_weights)
|
440 |
+
if validation_warnings:
|
441 |
+
logging.warning("\nWeight validation warnings:")
|
442 |
+
for warning in validation_warnings:
|
443 |
+
logging.warning(f" {warning}")
|
444 |
+
|
445 |
+
# Check cross-language consistency
|
446 |
+
consistency_warnings = check_cross_language_consistency(lang_weights)
|
447 |
+
if consistency_warnings:
|
448 |
+
logging.warning("\nCross-language consistency warnings:")
|
449 |
+
for warning in consistency_warnings:
|
450 |
+
logging.warning(f" {warning}")
|
451 |
+
|
452 |
+
return lang_weights
|
453 |
+
|
454 |
+
def main():
|
455 |
+
# Load dataset
|
456 |
+
input_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_AUGMENTED.csv'
|
457 |
+
logging.info(f"Loading dataset from {input_file}")
|
458 |
+
df = pd.read_csv(input_file)
|
459 |
+
|
460 |
+
# Compute weights
|
461 |
+
lang_weights = compute_language_weights(df)
|
462 |
+
|
463 |
+
# Add metadata
|
464 |
+
weights_data = {
|
465 |
+
"metadata": {
|
466 |
+
"total_samples": len(df),
|
467 |
+
"language_distribution": df['lang'].value_counts().to_dict(),
|
468 |
+
"weight_calculation": {
|
469 |
+
"method": "focal_loss_with_adaptive_scaling",
|
470 |
+
"parameters": {
|
471 |
+
"default_max_weight": 15.0,
|
472 |
+
"default_min_weight": 0.5,
|
473 |
+
"language_specific_adjustments": True
|
474 |
+
}
|
475 |
+
}
|
476 |
+
},
|
477 |
+
"weights": lang_weights
|
478 |
+
}
|
479 |
+
|
480 |
+
# Save weights
|
481 |
+
output_file = 'weights/language_class_weights.json'
|
482 |
+
logging.info(f"\nSaving weights to {output_file}")
|
483 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
484 |
+
json.dump(weights_data, f, indent=2, ensure_ascii=False)
|
485 |
+
|
486 |
+
logging.info("\nWeight calculation complete!")
|
487 |
+
|
488 |
+
# Print summary statistics
|
489 |
+
logging.info("\nSummary of adjustments made:")
|
490 |
+
for lang in lang_weights:
|
491 |
+
for col in ['threat', 'identity_hate']:
|
492 |
+
if col in lang_weights[lang]:
|
493 |
+
weight = lang_weights[lang][col]['1']
|
494 |
+
raw = lang_weights[lang][col]['raw_weight_1']
|
495 |
+
if raw != weight:
|
496 |
+
logging.info(f"{lang} {col}: Adjusted from {raw:.2f}× to {weight:.2f}×")
|
497 |
+
|
498 |
+
if __name__ == "__main__":
|
499 |
+
main()
|
analysis/plot_loss_curves.py
ADDED
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import torch
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
+
from datetime import datetime
|
6 |
+
import logging
|
7 |
+
from pathlib import Path
|
8 |
+
from torch.utils.data import DataLoader
|
9 |
+
import sys
|
10 |
+
import os
|
11 |
+
import wandb
|
12 |
+
from transformers import get_linear_schedule_with_warmup
|
13 |
+
|
14 |
+
# Add project root to path
|
15 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
16 |
+
|
17 |
+
from model.training_config import TrainingConfig
|
18 |
+
from model.language_aware_transformer import LanguageAwareTransformer
|
19 |
+
from model.train import ToxicDataset
|
20 |
+
from transformers import XLMRobertaTokenizer
|
21 |
+
|
22 |
+
# Set up logging
|
23 |
+
logging.basicConfig(
|
24 |
+
level=logging.INFO,
|
25 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
26 |
+
)
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
def setup_plot_style():
|
30 |
+
"""Configure plot styling"""
|
31 |
+
plt.style.use('seaborn-darkgrid')
|
32 |
+
plt.rcParams['figure.figsize'] = (12, 12)
|
33 |
+
plt.rcParams['font.size'] = 12
|
34 |
+
|
35 |
+
def setup_wandb():
|
36 |
+
"""Initialize wandb for validation tracking"""
|
37 |
+
try:
|
38 |
+
wandb.init(
|
39 |
+
project="toxic-comment-classification",
|
40 |
+
name=f"validation-analysis-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
41 |
+
config={
|
42 |
+
"analysis_type": "validation_loss",
|
43 |
+
"timestamp": datetime.now().strftime('%Y%m%d-%H%M%S')
|
44 |
+
}
|
45 |
+
)
|
46 |
+
logger.info("Initialized wandb logging")
|
47 |
+
except Exception as e:
|
48 |
+
logger.error(f"Error initializing wandb: {str(e)}")
|
49 |
+
raise
|
50 |
+
|
51 |
+
def load_model_and_data():
|
52 |
+
"""Load the model and validation data"""
|
53 |
+
try:
|
54 |
+
# Initialize config with training settings
|
55 |
+
config = TrainingConfig(
|
56 |
+
batch_size=16,
|
57 |
+
num_workers=16,
|
58 |
+
lr=2e-5,
|
59 |
+
weight_decay=0.01,
|
60 |
+
max_grad_norm=1.0,
|
61 |
+
warmup_ratio=0.1,
|
62 |
+
label_smoothing=0.01,
|
63 |
+
|
64 |
+
mixed_precision="fp16",
|
65 |
+
activation_checkpointing=True,
|
66 |
+
epochs=1 # Number of validation epochs
|
67 |
+
|
68 |
+
)
|
69 |
+
|
70 |
+
# Load validation data
|
71 |
+
logger.info("Loading validation and test data...")
|
72 |
+
val_df = pd.read_csv("dataset/split/val.csv")
|
73 |
+
test_df = pd.read_csv("dataset/split/test.csv")
|
74 |
+
combined_df = pd.concat([val_df, test_df])
|
75 |
+
tokenizer = XLMRobertaTokenizer.from_pretrained(config.model_name)
|
76 |
+
combined_dataset = ToxicDataset(combined_df, tokenizer, config, mode='combined')
|
77 |
+
|
78 |
+
|
79 |
+
# Create combined dataloader
|
80 |
+
combined_loader = DataLoader(
|
81 |
+
combined_dataset,
|
82 |
+
batch_size=config.batch_size,
|
83 |
+
shuffle=True, # Enable shuffling
|
84 |
+
num_workers=config.num_workers,
|
85 |
+
pin_memory=True,
|
86 |
+
drop_last=False # Keep all samples
|
87 |
+
)
|
88 |
+
|
89 |
+
# Log dataloader config to wandb
|
90 |
+
if wandb.run is not None:
|
91 |
+
wandb.config.update({
|
92 |
+
'shuffle': True,
|
93 |
+
'drop_last': False,
|
94 |
+
'total_validation_steps': len(combined_loader),
|
95 |
+
'total_validation_samples': len(combined_dataset)
|
96 |
+
})
|
97 |
+
|
98 |
+
|
99 |
+
# Load model
|
100 |
+
logger.info("Loading model...")
|
101 |
+
model = LanguageAwareTransformer(
|
102 |
+
num_labels=len(config.toxicity_labels),
|
103 |
+
model_name=config.model_name
|
104 |
+
)
|
105 |
+
|
106 |
+
# Load latest checkpoint
|
107 |
+
checkpoint_path = Path('weights/toxic_classifier_xlm-roberta-large/pytorch_model.bin')
|
108 |
+
if checkpoint_path.exists():
|
109 |
+
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
110 |
+
model.load_state_dict(checkpoint)
|
111 |
+
logger.info("Loaded model checkpoint")
|
112 |
+
else:
|
113 |
+
raise FileNotFoundError("No checkpoint found")
|
114 |
+
|
115 |
+
# Move model to GPU if available
|
116 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
117 |
+
model = model.to(device)
|
118 |
+
|
119 |
+
# Setup optimizer
|
120 |
+
param_groups = config.get_param_groups(model)
|
121 |
+
optimizer = torch.optim.AdamW(param_groups)
|
122 |
+
|
123 |
+
# Setup scheduler
|
124 |
+
total_steps = len(combined_loader) * config.epochs
|
125 |
+
warmup_steps = int(total_steps * config.warmup_ratio)
|
126 |
+
|
127 |
+
scheduler = get_linear_schedule_with_warmup(
|
128 |
+
optimizer,
|
129 |
+
num_warmup_steps=warmup_steps,
|
130 |
+
num_training_steps=total_steps
|
131 |
+
)
|
132 |
+
|
133 |
+
# Initialize gradient scaler for mixed precision
|
134 |
+
scaler = torch.cuda.amp.GradScaler(enabled=config.mixed_precision == "fp16")
|
135 |
+
|
136 |
+
# Log model configuration to wandb
|
137 |
+
if wandb.run is not None:
|
138 |
+
wandb.config.update({
|
139 |
+
'model_name': config.model_name,
|
140 |
+
'batch_size': config.batch_size,
|
141 |
+
'learning_rate': config.lr,
|
142 |
+
'weight_decay': config.weight_decay,
|
143 |
+
'max_grad_norm': config.max_grad_norm,
|
144 |
+
'warmup_ratio': config.warmup_ratio,
|
145 |
+
'label_smoothing': config.label_smoothing,
|
146 |
+
'mixed_precision': config.mixed_precision,
|
147 |
+
'num_workers': config.num_workers,
|
148 |
+
'activation_checkpointing': config.activation_checkpointing,
|
149 |
+
'validation_epochs': config.epochs
|
150 |
+
})
|
151 |
+
|
152 |
+
return model, combined_loader, device, optimizer, scheduler, scaler, config
|
153 |
+
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
logger.error(f"Error loading model and data: {str(e)}")
|
157 |
+
raise
|
158 |
+
|
159 |
+
def collect_validation_losses(model, combined_loader, device, optimizer, scheduler, scaler, config):
|
160 |
+
"""Run validation and collect step losses across multiple epochs"""
|
161 |
+
try:
|
162 |
+
logger.warning("This is an analysis run on combined val+test data - model will not be saved or updated")
|
163 |
+
# Ensure we're in eval mode and no gradients are computed
|
164 |
+
model.eval()
|
165 |
+
for param in model.parameters():
|
166 |
+
param.requires_grad = False
|
167 |
+
|
168 |
+
all_losses = []
|
169 |
+
epoch_losses = []
|
170 |
+
|
171 |
+
for epoch in range(config.epochs):
|
172 |
+
logger.info(f"\nStarting validation epoch {epoch+1}/{config.epochs}")
|
173 |
+
total_loss = 0
|
174 |
+
num_batches = len(combined_loader)
|
175 |
+
epoch_start_time = datetime.now()
|
176 |
+
|
177 |
+
with torch.no_grad(): # Extra safety to ensure no gradients
|
178 |
+
for step, batch in enumerate(combined_loader):
|
179 |
+
# Move batch to device
|
180 |
+
batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v
|
181 |
+
for k, v in batch.items()}
|
182 |
+
|
183 |
+
# Forward pass with mixed precision
|
184 |
+
with torch.cuda.amp.autocast(enabled=config.mixed_precision != "no"):
|
185 |
+
outputs = model(**batch)
|
186 |
+
loss = outputs['loss'].item()
|
187 |
+
|
188 |
+
total_loss += loss
|
189 |
+
|
190 |
+
# Calculate running averages
|
191 |
+
avg_loss = total_loss / (step + 1)
|
192 |
+
|
193 |
+
# Get learning rates
|
194 |
+
lrs = [group['lr'] for group in optimizer.param_groups]
|
195 |
+
|
196 |
+
# Log to wandb
|
197 |
+
wandb.log({
|
198 |
+
'val/step_loss': loss,
|
199 |
+
'val/running_avg_loss': avg_loss,
|
200 |
+
'val/progress': (step + 1) / num_batches * 100,
|
201 |
+
'val/learning_rate': lrs[0], # Base learning rate
|
202 |
+
'val/batch_size': config.batch_size,
|
203 |
+
'val/epoch': epoch + 1,
|
204 |
+
'val/global_step': epoch * num_batches + step
|
205 |
+
})
|
206 |
+
|
207 |
+
# Log progress
|
208 |
+
if step % 10 == 0:
|
209 |
+
elapsed_time = datetime.now() - epoch_start_time
|
210 |
+
steps_per_sec = (step + 1) / elapsed_time.total_seconds()
|
211 |
+
remaining_steps = num_batches - (step + 1)
|
212 |
+
eta_seconds = remaining_steps / steps_per_sec if steps_per_sec > 0 else 0
|
213 |
+
|
214 |
+
logger.info(
|
215 |
+
f"Epoch [{epoch+1}/{config.epochs}] "
|
216 |
+
f"Step [{step+1}/{num_batches}] "
|
217 |
+
f"Loss: {loss:.4f} "
|
218 |
+
f"Avg Loss: {avg_loss:.4f} "
|
219 |
+
f"LR: {lrs[0]:.2e} "
|
220 |
+
f"ETA: {int(eta_seconds)}s"
|
221 |
+
)
|
222 |
+
|
223 |
+
# Calculate epoch statistics
|
224 |
+
epoch_avg_loss = total_loss / num_batches
|
225 |
+
epoch_losses.append({
|
226 |
+
'epoch': epoch + 1,
|
227 |
+
'avg_loss': epoch_avg_loss,
|
228 |
+
'elapsed_time': (datetime.now() - epoch_start_time).total_seconds()
|
229 |
+
})
|
230 |
+
|
231 |
+
# Log epoch metrics to wandb
|
232 |
+
wandb.log({
|
233 |
+
'val/epoch_avg_loss': epoch_avg_loss,
|
234 |
+
'val/epoch_number': epoch + 1,
|
235 |
+
'val/epoch_time': epoch_losses[-1]['elapsed_time']
|
236 |
+
})
|
237 |
+
|
238 |
+
# Clear GPU memory after each epoch
|
239 |
+
torch.cuda.empty_cache()
|
240 |
+
|
241 |
+
return epoch_losses
|
242 |
+
|
243 |
+
except Exception as e:
|
244 |
+
logger.error(f"Error collecting validation losses: {str(e)}")
|
245 |
+
raise
|
246 |
+
|
247 |
+
def plot_validation_losses(epoch_losses):
|
248 |
+
"""Plot validation epoch losses"""
|
249 |
+
try:
|
250 |
+
setup_plot_style()
|
251 |
+
|
252 |
+
# Create figure
|
253 |
+
fig, ax = plt.subplots()
|
254 |
+
|
255 |
+
# Extract data
|
256 |
+
epochs = [d['epoch'] for d in epoch_losses]
|
257 |
+
losses = [d['avg_loss'] for d in epoch_losses]
|
258 |
+
|
259 |
+
# Plot epoch losses
|
260 |
+
ax.plot(epochs, losses, 'go-', label='Epoch Average Loss', linewidth=2, markersize=8)
|
261 |
+
|
262 |
+
# Add trend line
|
263 |
+
z = np.polyfit(epochs, losses, 1)
|
264 |
+
p = np.poly1d(z)
|
265 |
+
ax.plot(epochs, p(epochs), "r--", alpha=0.8, label='Loss Trend')
|
266 |
+
|
267 |
+
# Customize plot
|
268 |
+
ax.set_title('Validation Epoch Losses')
|
269 |
+
ax.set_xlabel('Epoch')
|
270 |
+
ax.set_ylabel('Average Loss')
|
271 |
+
ax.legend()
|
272 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
273 |
+
|
274 |
+
# Adjust layout
|
275 |
+
plt.tight_layout()
|
276 |
+
|
277 |
+
# Create output directory if it doesn't exist
|
278 |
+
output_dir = Path('analysis/plots')
|
279 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
280 |
+
|
281 |
+
# Save plot
|
282 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
283 |
+
output_path = output_dir / f'validation_losses_{timestamp}.png'
|
284 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
285 |
+
logger.info(f"Plot saved to {output_path}")
|
286 |
+
|
287 |
+
# Log plot to wandb
|
288 |
+
wandb.log({
|
289 |
+
"val/loss_plot": wandb.Image(str(output_path))
|
290 |
+
})
|
291 |
+
|
292 |
+
# Show plot
|
293 |
+
plt.show()
|
294 |
+
|
295 |
+
except Exception as e:
|
296 |
+
logger.error(f"Error plotting validation losses: {str(e)}")
|
297 |
+
raise
|
298 |
+
|
299 |
+
def calculate_loss_statistics(epoch_losses):
|
300 |
+
"""Calculate and print loss statistics"""
|
301 |
+
try:
|
302 |
+
losses = [d['avg_loss'] for d in epoch_losses]
|
303 |
+
|
304 |
+
stats = {
|
305 |
+
'Mean Loss': np.mean(losses),
|
306 |
+
'Std Loss': np.std(losses),
|
307 |
+
'Min Loss': np.min(losses),
|
308 |
+
'Max Loss': np.max(losses),
|
309 |
+
'Best Epoch': epoch_losses[np.argmin(losses)]['epoch']
|
310 |
+
}
|
311 |
+
|
312 |
+
# Log statistics to wandb
|
313 |
+
wandb.log({
|
314 |
+
'val/mean_loss': stats['Mean Loss'],
|
315 |
+
'val/std_loss': stats['Std Loss'],
|
316 |
+
'val/min_loss': stats['Min Loss'],
|
317 |
+
'val/max_loss': stats['Max Loss'],
|
318 |
+
'val/best_epoch': stats['Best Epoch']
|
319 |
+
})
|
320 |
+
|
321 |
+
# Print statistics
|
322 |
+
print("\nValidation Loss Statistics:")
|
323 |
+
for metric_name, value in stats.items():
|
324 |
+
if metric_name == 'Best Epoch':
|
325 |
+
print(f"{metric_name}: {int(value)}")
|
326 |
+
else:
|
327 |
+
print(f"{metric_name}: {value:.4f}")
|
328 |
+
|
329 |
+
return stats
|
330 |
+
|
331 |
+
except Exception as e:
|
332 |
+
logger.error(f"Error calculating statistics: {str(e)}")
|
333 |
+
raise
|
334 |
+
|
335 |
+
def main():
|
336 |
+
try:
|
337 |
+
# Initialize wandb
|
338 |
+
setup_wandb()
|
339 |
+
|
340 |
+
# Load model and data
|
341 |
+
logger.info("Loading model and data...")
|
342 |
+
model, combined_loader, device, optimizer, scheduler, scaler, config = load_model_and_data()
|
343 |
+
|
344 |
+
|
345 |
+
# Collect validation losses
|
346 |
+
logger.info("Collecting validation losses...")
|
347 |
+
epoch_losses = collect_validation_losses(
|
348 |
+
model, combined_loader, device, optimizer, scheduler, scaler, config
|
349 |
+
)
|
350 |
+
|
351 |
+
|
352 |
+
# Plot losses
|
353 |
+
logger.info("Plotting validation losses...")
|
354 |
+
plot_validation_losses(epoch_losses)
|
355 |
+
|
356 |
+
# Calculate and print statistics
|
357 |
+
logger.info("Calculating statistics...")
|
358 |
+
calculate_loss_statistics(epoch_losses)
|
359 |
+
|
360 |
+
except Exception as e:
|
361 |
+
logger.error(f"Error in main: {str(e)}")
|
362 |
+
raise
|
363 |
+
finally:
|
364 |
+
# Clean up
|
365 |
+
torch.cuda.empty_cache()
|
366 |
+
# Finish wandb run
|
367 |
+
wandb.finish()
|
368 |
+
|
369 |
+
if __name__ == "__main__":
|
370 |
+
try:
|
371 |
+
main()
|
372 |
+
except Exception as e:
|
373 |
+
logger.error(f"Script failed: {str(e)}")
|
374 |
+
raise
|
analysis/plot_roc_curves.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
from sklearn.metrics import roc_curve, auc
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
def plot_roc_curves(predictions_path, output_dir=None):
|
9 |
+
"""
|
10 |
+
Plot ROC curves from model predictions
|
11 |
+
|
12 |
+
Args:
|
13 |
+
predictions_path (str): Path to the .npz file containing predictions
|
14 |
+
output_dir (str, optional): Directory to save plots. If None, will use same directory as predictions
|
15 |
+
"""
|
16 |
+
# Load predictions
|
17 |
+
data = np.load(predictions_path)
|
18 |
+
predictions = data['predictions']
|
19 |
+
labels = data['labels']
|
20 |
+
langs = data['langs']
|
21 |
+
|
22 |
+
# Create output directory
|
23 |
+
if output_dir is None:
|
24 |
+
output_dir = os.path.dirname(predictions_path)
|
25 |
+
plots_dir = os.path.join(output_dir, 'plots')
|
26 |
+
os.makedirs(plots_dir, exist_ok=True)
|
27 |
+
|
28 |
+
# Define toxicity types
|
29 |
+
toxicity_types = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
30 |
+
|
31 |
+
# Define language mapping
|
32 |
+
id_to_lang = {
|
33 |
+
0: 'English (en)',
|
34 |
+
1: 'Russian (ru)',
|
35 |
+
2: 'Turkish (tr)',
|
36 |
+
3: 'Spanish (es)',
|
37 |
+
4: 'French (fr)',
|
38 |
+
5: 'Italian (it)',
|
39 |
+
6: 'Portuguese (pt)'
|
40 |
+
}
|
41 |
+
|
42 |
+
# Plot overall ROC curves (one per class)
|
43 |
+
plt.figure(figsize=(10, 8))
|
44 |
+
for i, class_name in enumerate(toxicity_types):
|
45 |
+
fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
|
46 |
+
roc_auc = auc(fpr, tpr)
|
47 |
+
|
48 |
+
plt.plot(fpr, tpr, label=f'{class_name} (AUC = {roc_auc:.3f})')
|
49 |
+
|
50 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
51 |
+
plt.xlabel('False Positive Rate')
|
52 |
+
plt.ylabel('True Positive Rate')
|
53 |
+
plt.title('ROC Curves - All Classes')
|
54 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
55 |
+
plt.grid(True)
|
56 |
+
plt.tight_layout()
|
57 |
+
plt.savefig(os.path.join(plots_dir, 'roc_all_classes.png'), dpi=300, bbox_inches='tight')
|
58 |
+
plt.close()
|
59 |
+
|
60 |
+
# Plot per-class ROC curves with confidence intervals
|
61 |
+
n_bootstrap = 1000
|
62 |
+
n_classes = len(toxicity_types)
|
63 |
+
|
64 |
+
for i, class_name in enumerate(toxicity_types):
|
65 |
+
plt.figure(figsize=(8, 6))
|
66 |
+
|
67 |
+
# Calculate main ROC curve
|
68 |
+
fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
|
69 |
+
roc_auc = auc(fpr, tpr)
|
70 |
+
|
71 |
+
# Plot main curve
|
72 |
+
plt.plot(fpr, tpr, 'b-', label=f'ROC (AUC = {roc_auc:.3f})')
|
73 |
+
|
74 |
+
# Bootstrap for confidence intervals
|
75 |
+
tprs = []
|
76 |
+
aucs = []
|
77 |
+
mean_fpr = np.linspace(0, 1, 100)
|
78 |
+
|
79 |
+
for _ in range(n_bootstrap):
|
80 |
+
# Bootstrap sample indices
|
81 |
+
indices = np.random.randint(0, len(labels), len(labels))
|
82 |
+
if len(np.unique(labels[indices, i])) < 2:
|
83 |
+
continue
|
84 |
+
|
85 |
+
# Calculate ROC curve
|
86 |
+
fpr, tpr, _ = roc_curve(labels[indices, i], predictions[indices, i])
|
87 |
+
|
88 |
+
# Interpolate TPR at mean FPR points
|
89 |
+
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
90 |
+
interp_tpr[0] = 0.0
|
91 |
+
tprs.append(interp_tpr)
|
92 |
+
aucs.append(auc(fpr, tpr))
|
93 |
+
|
94 |
+
# Calculate confidence intervals
|
95 |
+
tprs = np.array(tprs)
|
96 |
+
mean_tpr = np.mean(tprs, axis=0)
|
97 |
+
std_tpr = np.std(tprs, axis=0)
|
98 |
+
|
99 |
+
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
|
100 |
+
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
|
101 |
+
|
102 |
+
# Plot confidence interval
|
103 |
+
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
|
104 |
+
label=f'±1 std. dev.')
|
105 |
+
|
106 |
+
# Calculate AUC confidence interval
|
107 |
+
auc_mean = np.mean(aucs)
|
108 |
+
auc_std = np.std(aucs)
|
109 |
+
plt.plot([], [], ' ', label=f'AUC = {auc_mean:.3f} ± {auc_std:.3f}')
|
110 |
+
|
111 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
112 |
+
plt.xlabel('False Positive Rate')
|
113 |
+
plt.ylabel('True Positive Rate')
|
114 |
+
plt.title(f'ROC Curve - {class_name}')
|
115 |
+
plt.legend(loc='lower right')
|
116 |
+
plt.grid(True)
|
117 |
+
plt.tight_layout()
|
118 |
+
plt.savefig(os.path.join(plots_dir, f'roc_{class_name}.png'), dpi=300)
|
119 |
+
plt.close()
|
120 |
+
|
121 |
+
# Plot per-language ROC curves (for toxic class)
|
122 |
+
plt.figure(figsize=(10, 8))
|
123 |
+
for lang_id, lang_name in id_to_lang.items():
|
124 |
+
# Get samples for this language
|
125 |
+
lang_mask = langs == lang_id
|
126 |
+
if lang_mask.sum() > 0 and len(np.unique(labels[lang_mask, 0])) > 1:
|
127 |
+
fpr, tpr, _ = roc_curve(labels[lang_mask, 0], predictions[lang_mask, 0])
|
128 |
+
roc_auc = auc(fpr, tpr)
|
129 |
+
plt.plot(fpr, tpr, label=f'{lang_name} (AUC = {roc_auc:.3f})')
|
130 |
+
|
131 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
132 |
+
plt.xlabel('False Positive Rate')
|
133 |
+
plt.ylabel('True Positive Rate')
|
134 |
+
plt.title('ROC Curves by Language - Toxic Class')
|
135 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
136 |
+
plt.grid(True)
|
137 |
+
plt.tight_layout()
|
138 |
+
plt.savefig(os.path.join(plots_dir, 'roc_by_language.png'), dpi=300, bbox_inches='tight')
|
139 |
+
plt.close()
|
140 |
+
|
141 |
+
print(f"\nROC curves have been saved to {plots_dir}")
|
142 |
+
print("\nGenerated plots:")
|
143 |
+
print("1. roc_all_classes.png - ROC curves for all toxicity classes")
|
144 |
+
print("2. roc_[class_name].png - Individual ROC curves with confidence intervals for each class")
|
145 |
+
print("3. roc_by_language.png - ROC curves for each language (toxic class)")
|
146 |
+
|
147 |
+
if __name__ == '__main__':
|
148 |
+
# Use the latest evaluation results
|
149 |
+
eval_dir = 'evaluation_results'
|
150 |
+
if os.path.exists(eval_dir):
|
151 |
+
# Find most recent evaluation directory
|
152 |
+
eval_dirs = sorted([d for d in os.listdir(eval_dir) if d.startswith('eval_')], reverse=True)
|
153 |
+
if eval_dirs:
|
154 |
+
latest_eval = os.path.join(eval_dir, eval_dirs[0])
|
155 |
+
predictions_path = os.path.join(latest_eval, 'predictions.npz')
|
156 |
+
if os.path.exists(predictions_path):
|
157 |
+
plot_roc_curves(predictions_path)
|
158 |
+
else:
|
159 |
+
print(f"No predictions file found in {latest_eval}")
|
160 |
+
else:
|
161 |
+
print(f"No evaluation directories found in {eval_dir}")
|
162 |
+
else:
|
163 |
+
print(f"Evaluation directory {eval_dir} not found")
|
app.py
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
from model.inference_optimized import OptimizedToxicityClassifier
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from typing import List, Dict
|
9 |
+
import langid
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
# Configure paths
|
13 |
+
ONNX_MODEL_PATH = os.environ.get("ONNX_MODEL_PATH", "weights/toxic_classifier.onnx")
|
14 |
+
PYTORCH_MODEL_PATH = os.environ.get("PYTORCH_MODEL_PATH", "weights/toxic_classifier_xlm-roberta-large/pytorch_model.bin")
|
15 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
+
|
17 |
+
# Supported languages
|
18 |
+
SUPPORTED_LANGUAGES = {
|
19 |
+
'en': 'English',
|
20 |
+
'ru': 'Russian',
|
21 |
+
'tr': 'Turkish',
|
22 |
+
'es': 'Spanish',
|
23 |
+
'fr': 'French',
|
24 |
+
'it': 'Italian',
|
25 |
+
'pt': 'Portuguese'
|
26 |
+
}
|
27 |
+
|
28 |
+
# Initialize classifier
|
29 |
+
try:
|
30 |
+
if os.path.exists(ONNX_MODEL_PATH):
|
31 |
+
classifier = OptimizedToxicityClassifier(onnx_path=ONNX_MODEL_PATH, device=DEVICE)
|
32 |
+
print(f"Loaded ONNX model from {ONNX_MODEL_PATH}")
|
33 |
+
else:
|
34 |
+
classifier = OptimizedToxicityClassifier(pytorch_path=PYTORCH_MODEL_PATH, device=DEVICE)
|
35 |
+
print(f"Loaded PyTorch model from {PYTORCH_MODEL_PATH}")
|
36 |
+
except Exception as e:
|
37 |
+
print(f"Error loading model: {str(e)}")
|
38 |
+
classifier = None
|
39 |
+
|
40 |
+
def detect_language(text: str) -> str:
|
41 |
+
"""Detect language of input text"""
|
42 |
+
try:
|
43 |
+
lang, _ = langid.classify(text)
|
44 |
+
return lang if lang in SUPPORTED_LANGUAGES else 'en'
|
45 |
+
except:
|
46 |
+
return 'en'
|
47 |
+
|
48 |
+
def predict_toxicity(text: str, selected_language: str = None) -> Dict:
|
49 |
+
"""Predict toxicity of input text"""
|
50 |
+
if not text or not text.strip():
|
51 |
+
return {
|
52 |
+
"error": "Please enter some text to analyze.",
|
53 |
+
"html_result": "<div class='error'>Please enter some text to analyze.</div>"
|
54 |
+
}
|
55 |
+
|
56 |
+
if classifier is None:
|
57 |
+
return {
|
58 |
+
"error": "Model not loaded. Please check logs.",
|
59 |
+
"html_result": "<div class='error'>Model not loaded. Please check logs.</div>"
|
60 |
+
}
|
61 |
+
|
62 |
+
# Detect language if not specified
|
63 |
+
if not selected_language or selected_language == "Auto-detect":
|
64 |
+
lang_code = detect_language(text)
|
65 |
+
detected = True
|
66 |
+
else:
|
67 |
+
# Convert from display name to code
|
68 |
+
lang_code = next((code for code, name in SUPPORTED_LANGUAGES.items()
|
69 |
+
if name == selected_language), 'en')
|
70 |
+
detected = False
|
71 |
+
|
72 |
+
# Run prediction
|
73 |
+
try:
|
74 |
+
results = classifier.predict([text], langs=[lang_code])[0]
|
75 |
+
|
76 |
+
# Format probabilities for display
|
77 |
+
probs = results["probabilities"]
|
78 |
+
sorted_categories = sorted(
|
79 |
+
[(label, probs[label]) for label in probs],
|
80 |
+
key=lambda x: x[1],
|
81 |
+
reverse=True
|
82 |
+
)
|
83 |
+
|
84 |
+
# Create bar chart
|
85 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
86 |
+
labels = [label.replace('_', ' ').title() for label, _ in sorted_categories]
|
87 |
+
values = [prob * 100 for _, prob in sorted_categories]
|
88 |
+
colors = ['#ff6b6b' if val >= 50 else '#74c0fc' for val in values]
|
89 |
+
|
90 |
+
ax.barh(labels, values, color=colors)
|
91 |
+
ax.set_xlim(0, 100)
|
92 |
+
ax.set_xlabel('Probability (%)')
|
93 |
+
ax.set_title('Toxicity Analysis')
|
94 |
+
ax.grid(axis='x', linestyle='--', alpha=0.7)
|
95 |
+
|
96 |
+
# Annotate values
|
97 |
+
for i, v in enumerate(values):
|
98 |
+
ax.text(v + 1, i, f'{v:.1f}%', va='center')
|
99 |
+
|
100 |
+
# Create HTML result
|
101 |
+
lang_display = SUPPORTED_LANGUAGES.get(lang_code, lang_code)
|
102 |
+
overall_result = "TOXIC" if results["is_toxic"] else "NON-TOXIC"
|
103 |
+
result_color = "#ff6b6b" if results["is_toxic"] else "#66d9e8"
|
104 |
+
|
105 |
+
html_result = f"""
|
106 |
+
<div style='margin-bottom: 20px;'>
|
107 |
+
<h2>Analysis Result: <span style='color: {result_color};'>{overall_result}</span></h2>
|
108 |
+
<h3>Language: {lang_display} {'(detected)' if detected else ''}</h3>
|
109 |
+
</div>
|
110 |
+
<div style='margin-bottom: 10px;'>
|
111 |
+
<table width='100%' style='border-collapse: collapse;'>
|
112 |
+
<tr style='background-color: #e9ecef; font-weight: bold;'>
|
113 |
+
<th style='padding: 8px; text-align: left; border: 1px solid #dee2e6;'>Category</th>
|
114 |
+
<th style='padding: 8px; text-align: right; border: 1px solid #dee2e6;'>Probability</th>
|
115 |
+
<th style='padding: 8px; text-align: center; border: 1px solid #dee2e6;'>Status</th>
|
116 |
+
</tr>
|
117 |
+
"""
|
118 |
+
|
119 |
+
# Add rows for each toxicity category
|
120 |
+
for label, prob in sorted_categories:
|
121 |
+
formatted_label = label.replace('_', ' ').title()
|
122 |
+
status = "DETECTED" if prob >= 0.5 else "Not Detected"
|
123 |
+
status_color = "#ff6b6b" if prob >= 0.5 else "#66d9e8"
|
124 |
+
prob_percent = f"{prob * 100:.1f}%"
|
125 |
+
|
126 |
+
html_result += f"""
|
127 |
+
<tr>
|
128 |
+
<td style='padding: 8px; border: 1px solid #dee2e6;'>{formatted_label}</td>
|
129 |
+
<td style='padding: 8px; text-align: right; border: 1px solid #dee2e6;'>{prob_percent}</td>
|
130 |
+
<td style='padding: 8px; text-align: center; border: 1px solid #dee2e6; color: {status_color}; font-weight: bold;'>{status}</td>
|
131 |
+
</tr>
|
132 |
+
"""
|
133 |
+
|
134 |
+
html_result += "</table></div>"
|
135 |
+
|
136 |
+
# Add detected categories if toxic
|
137 |
+
if results["is_toxic"]:
|
138 |
+
toxic_categories = [cat.replace('_', ' ').title() for cat in results["toxic_categories"]]
|
139 |
+
categories_list = ", ".join(toxic_categories)
|
140 |
+
html_result += f"""
|
141 |
+
<div style='margin-top: 10px;'>
|
142 |
+
<p><strong>Detected toxic categories:</strong> {categories_list}</p>
|
143 |
+
</div>
|
144 |
+
"""
|
145 |
+
|
146 |
+
return {
|
147 |
+
"prediction": results,
|
148 |
+
"html_result": html_result,
|
149 |
+
"fig": fig
|
150 |
+
}
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
import traceback
|
154 |
+
traceback.print_exc()
|
155 |
+
return {
|
156 |
+
"error": f"Error processing text: {str(e)}",
|
157 |
+
"html_result": f"<div class='error'>Error processing text: {str(e)}</div>"
|
158 |
+
}
|
159 |
+
|
160 |
+
def create_app():
|
161 |
+
"""Create and configure the Gradio interface"""
|
162 |
+
# Create language dropdown options
|
163 |
+
language_options = ["Auto-detect"] + list(SUPPORTED_LANGUAGES.values())
|
164 |
+
|
165 |
+
# Define the interface
|
166 |
+
with gr.Blocks(css="""
|
167 |
+
.error { color: #ff6b6b; font-weight: bold; padding: 10px; border: 1px solid #ff6b6b; }
|
168 |
+
.container { margin: 0 auto; max-width: 900px; }
|
169 |
+
.gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
|
170 |
+
.example-text { font-style: italic; color: #666; }
|
171 |
+
""") as app:
|
172 |
+
gr.Markdown("""
|
173 |
+
# Multilingual Toxic Comment Classifier
|
174 |
+
This app analyzes text for different types of toxicity across multiple languages.
|
175 |
+
Enter your text, select a language (or let it auto-detect), and click 'Analyze'.
|
176 |
+
|
177 |
+
Supported languages: English, Russian, Turkish, Spanish, French, Italian, Portuguese
|
178 |
+
""")
|
179 |
+
|
180 |
+
with gr.Row():
|
181 |
+
with gr.Column(scale=3):
|
182 |
+
text_input = gr.Textbox(
|
183 |
+
label="Enter text to analyze",
|
184 |
+
placeholder="Type or paste text here...",
|
185 |
+
lines=5
|
186 |
+
)
|
187 |
+
lang_dropdown = gr.Dropdown(
|
188 |
+
choices=language_options,
|
189 |
+
value="Auto-detect",
|
190 |
+
label="Language"
|
191 |
+
)
|
192 |
+
analyze_btn = gr.Button("Analyze", variant="primary")
|
193 |
+
|
194 |
+
with gr.Column(scale=2):
|
195 |
+
gr.Markdown("### Example texts:")
|
196 |
+
with gr.Accordion("English example"):
|
197 |
+
en_example_btn = gr.Button("Use English example")
|
198 |
+
with gr.Accordion("Spanish example"):
|
199 |
+
es_example_btn = gr.Button("Use Spanish example")
|
200 |
+
with gr.Accordion("French example"):
|
201 |
+
fr_example_btn = gr.Button("Use French example")
|
202 |
+
|
203 |
+
# Examples
|
204 |
+
en_example_text = "You are such an idiot, nobody likes your stupid content."
|
205 |
+
es_example_text = "Eres un completo idiota y nadie te quiere."
|
206 |
+
fr_example_text = "Tu es tellement stupide, personne n'aime ton contenu minable."
|
207 |
+
|
208 |
+
en_example_btn.click(
|
209 |
+
lambda: en_example_text,
|
210 |
+
outputs=text_input
|
211 |
+
)
|
212 |
+
es_example_btn.click(
|
213 |
+
lambda: es_example_text,
|
214 |
+
outputs=text_input
|
215 |
+
)
|
216 |
+
fr_example_btn.click(
|
217 |
+
lambda: fr_example_text,
|
218 |
+
outputs=text_input
|
219 |
+
)
|
220 |
+
|
221 |
+
# Output components
|
222 |
+
result_html = gr.HTML(label="Analysis Result")
|
223 |
+
plot_output = gr.Plot(label="Toxicity Probabilities")
|
224 |
+
|
225 |
+
# Set up event handling
|
226 |
+
analyze_btn.click(
|
227 |
+
predict_toxicity,
|
228 |
+
inputs=[text_input, lang_dropdown],
|
229 |
+
outputs=[result_html, plot_output]
|
230 |
+
)
|
231 |
+
|
232 |
+
# Also analyze on pressing Enter in the text box
|
233 |
+
text_input.submit(
|
234 |
+
predict_toxicity,
|
235 |
+
inputs=[text_input, lang_dropdown],
|
236 |
+
outputs=[result_html, plot_output]
|
237 |
+
)
|
238 |
+
|
239 |
+
gr.Markdown("""
|
240 |
+
### About this model
|
241 |
+
This model classifies text into six toxicity categories:
|
242 |
+
- **Toxic**: General toxicity
|
243 |
+
- **Severe Toxic**: Extreme toxicity
|
244 |
+
- **Obscene**: Obscene content
|
245 |
+
- **Threat**: Threatening content
|
246 |
+
- **Insult**: Insulting content
|
247 |
+
- **Identity Hate**: Identity-based hate
|
248 |
+
|
249 |
+
Built using XLM-RoBERTa with language-aware fine-tuning.
|
250 |
+
""")
|
251 |
+
|
252 |
+
return app
|
253 |
+
|
254 |
+
# Launch the app when script is run directly
|
255 |
+
if __name__ == "__main__":
|
256 |
+
# Create and launch the app
|
257 |
+
app = create_app()
|
258 |
+
app.launch(
|
259 |
+
server_name="0.0.0.0", # Bind to all interfaces
|
260 |
+
server_port=7860, # Default Gradio port
|
261 |
+
share=True # Generate public link
|
262 |
+
)
|
augmentation/balance_english.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
|
4 |
+
# Configure CPU and thread settings FIRST, before any other imports
|
5 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'
|
6 |
+
os.environ['TF_CPU_ENABLE_AVX2'] = '1'
|
7 |
+
os.environ['TF_CPU_ENABLE_AVX512F'] = '1'
|
8 |
+
os.environ['TF_CPU_ENABLE_AVX512_VNNI'] = '1'
|
9 |
+
os.environ['TF_CPU_ENABLE_FMA'] = '1'
|
10 |
+
os.environ['MKL_NUM_THREADS'] = '80'
|
11 |
+
os.environ['OMP_NUM_THREADS'] = '80'
|
12 |
+
|
13 |
+
# Set PyTorch thread configurations once
|
14 |
+
torch.set_num_threads(80)
|
15 |
+
torch.set_num_interop_threads(10)
|
16 |
+
|
17 |
+
# Now import everything else
|
18 |
+
import pandas as pd
|
19 |
+
import numpy as np
|
20 |
+
from pathlib import Path
|
21 |
+
import logging
|
22 |
+
from datetime import datetime
|
23 |
+
import sys
|
24 |
+
from toxic_augment import ToxicAugmenter
|
25 |
+
import json
|
26 |
+
|
27 |
+
# Configure logging
|
28 |
+
log_dir = Path("logs")
|
29 |
+
log_dir.mkdir(exist_ok=True)
|
30 |
+
|
31 |
+
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
32 |
+
log_file = log_dir / f"balance_english_{timestamp}.log"
|
33 |
+
|
34 |
+
logging.basicConfig(
|
35 |
+
level=logging.INFO,
|
36 |
+
format='%(asctime)s | %(message)s',
|
37 |
+
handlers=[
|
38 |
+
logging.StreamHandler(sys.stdout),
|
39 |
+
logging.FileHandler(log_file)
|
40 |
+
]
|
41 |
+
)
|
42 |
+
|
43 |
+
logger = logging.getLogger(__name__)
|
44 |
+
|
45 |
+
def analyze_label_distribution(df, lang='en'):
|
46 |
+
"""Analyze label distribution for a specific language"""
|
47 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
48 |
+
lang_df = df[df['lang'] == lang]
|
49 |
+
total = len(lang_df)
|
50 |
+
|
51 |
+
if total == 0:
|
52 |
+
logger.warning(f"No samples found for language {lang.upper()}.")
|
53 |
+
return {}
|
54 |
+
|
55 |
+
logger.info(f"\nLabel Distribution for {lang.upper()}:")
|
56 |
+
logger.info("-" * 50)
|
57 |
+
dist = {}
|
58 |
+
for label in labels:
|
59 |
+
count = lang_df[label].sum()
|
60 |
+
percentage = (count / total) * 100
|
61 |
+
dist[label] = {'count': int(count), 'percentage': percentage}
|
62 |
+
logger.info(f"{label}: {count:,} ({percentage:.2f}%)")
|
63 |
+
return dist
|
64 |
+
|
65 |
+
def analyze_language_distribution(df):
|
66 |
+
"""Analyze current language distribution"""
|
67 |
+
lang_dist = df['lang'].value_counts()
|
68 |
+
logger.info("\nCurrent Language Distribution:")
|
69 |
+
logger.info("-" * 50)
|
70 |
+
for lang, count in lang_dist.items():
|
71 |
+
logger.info(f"{lang}: {count:,} comments ({count/len(df)*100:.2f}%)")
|
72 |
+
return lang_dist
|
73 |
+
|
74 |
+
def calculate_required_samples(df):
|
75 |
+
"""Calculate how many English samples we need to generate"""
|
76 |
+
lang_counts = df['lang'].value_counts()
|
77 |
+
target_count = lang_counts.max() # Use the largest language count as target
|
78 |
+
en_count = lang_counts.get('en', 0)
|
79 |
+
required_samples = target_count - en_count
|
80 |
+
|
81 |
+
logger.info(f"\nTarget count per language: {target_count:,}")
|
82 |
+
logger.info(f"Current English count: {en_count:,}")
|
83 |
+
logger.info(f"Required additional English samples: {required_samples:,}")
|
84 |
+
|
85 |
+
return required_samples
|
86 |
+
|
87 |
+
def generate_balanced_samples(df, required_samples):
|
88 |
+
"""Generate samples maintaining original class distribution ratios"""
|
89 |
+
logger.info("\nGenerating balanced samples...")
|
90 |
+
|
91 |
+
# Get English samples
|
92 |
+
en_df = df[df['lang'] == 'en']
|
93 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
94 |
+
|
95 |
+
# Calculate target counts for each label
|
96 |
+
target_counts = {}
|
97 |
+
for label in labels:
|
98 |
+
count = en_df[label].sum()
|
99 |
+
ratio = count / len(en_df)
|
100 |
+
target_count = int(ratio * required_samples)
|
101 |
+
target_counts[label] = target_count
|
102 |
+
logger.info(f"Target count for {label}: {target_count:,}")
|
103 |
+
|
104 |
+
augmented_samples = []
|
105 |
+
augmenter = ToxicAugmenter()
|
106 |
+
total_generated = 0
|
107 |
+
|
108 |
+
# Generate samples for each label
|
109 |
+
for label, target_count in target_counts.items():
|
110 |
+
if target_count == 0:
|
111 |
+
continue
|
112 |
+
|
113 |
+
logger.info(f"\nGenerating {target_count:,} samples for {label}")
|
114 |
+
|
115 |
+
# Get seed texts with this label
|
116 |
+
seed_texts = en_df[en_df[label] == 1]['comment_text'].tolist()
|
117 |
+
|
118 |
+
if not seed_texts:
|
119 |
+
logger.warning(f"No seed texts found for {label}, skipping...")
|
120 |
+
continue
|
121 |
+
|
122 |
+
# Generate samples with 5-minute timeout
|
123 |
+
new_samples = augmenter.augment_dataset(
|
124 |
+
target_samples=target_count,
|
125 |
+
label=label, # Using single label instead of label_combo
|
126 |
+
seed_texts=seed_texts,
|
127 |
+
timeout_minutes=5
|
128 |
+
)
|
129 |
+
|
130 |
+
if new_samples is not None and not new_samples.empty:
|
131 |
+
augmented_samples.append(new_samples)
|
132 |
+
total_generated += len(new_samples)
|
133 |
+
|
134 |
+
# Log progress
|
135 |
+
logger.info(f"✓ Generated {len(new_samples):,} samples")
|
136 |
+
logger.info(f"Progress: {total_generated:,}/{required_samples:,}")
|
137 |
+
|
138 |
+
# Check if we have reached our global required samples
|
139 |
+
if total_generated >= required_samples:
|
140 |
+
logger.info("Reached required sample count, stopping generation")
|
141 |
+
break
|
142 |
+
|
143 |
+
# Combine all generated samples
|
144 |
+
if augmented_samples:
|
145 |
+
augmented_df = pd.concat(augmented_samples, ignore_index=True)
|
146 |
+
augmented_df['lang'] = 'en'
|
147 |
+
|
148 |
+
# Ensure we don't exceed the required sample count
|
149 |
+
if len(augmented_df) > required_samples:
|
150 |
+
logger.info(f"Trimming excess samples from {len(augmented_df):,} to {required_samples:,}")
|
151 |
+
augmented_df = augmented_df.head(required_samples)
|
152 |
+
|
153 |
+
# Log final class distribution
|
154 |
+
logger.info("\nFinal class distribution in generated samples:")
|
155 |
+
for label in labels:
|
156 |
+
count = augmented_df[label].sum()
|
157 |
+
percentage = (count / len(augmented_df)) * 100
|
158 |
+
logger.info(f"{label}: {count:,} ({percentage:.2f}%)")
|
159 |
+
|
160 |
+
# Also log clean samples
|
161 |
+
clean_count = len(augmented_df[augmented_df[labels].sum(axis=1) == 0])
|
162 |
+
clean_percentage = (clean_count / len(augmented_df)) * 100
|
163 |
+
logger.info(f"Clean samples: {clean_count:,} ({clean_percentage:.2f}%)")
|
164 |
+
|
165 |
+
return augmented_df
|
166 |
+
else:
|
167 |
+
raise Exception("Failed to generate any valid samples")
|
168 |
+
|
169 |
+
def balance_english_data():
|
170 |
+
"""Main function to balance English data with other languages"""
|
171 |
+
try:
|
172 |
+
# Load dataset
|
173 |
+
input_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv'
|
174 |
+
logger.info(f"Loading dataset from {input_file}")
|
175 |
+
df = pd.read_csv(input_file)
|
176 |
+
|
177 |
+
# Analyze current distribution
|
178 |
+
logger.info("\nAnalyzing current distribution...")
|
179 |
+
initial_dist = analyze_language_distribution(df)
|
180 |
+
initial_label_dist = analyze_label_distribution(df, 'en')
|
181 |
+
|
182 |
+
# Calculate required samples
|
183 |
+
required_samples = calculate_required_samples(df)
|
184 |
+
|
185 |
+
if required_samples <= 0:
|
186 |
+
logger.info("English data is already balanced. No augmentation needed.")
|
187 |
+
return
|
188 |
+
|
189 |
+
# Generate balanced samples
|
190 |
+
augmented_df = generate_balanced_samples(df, required_samples)
|
191 |
+
|
192 |
+
# Merge with original dataset
|
193 |
+
logger.info("\nMerging datasets...")
|
194 |
+
output_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_BALANCED.csv'
|
195 |
+
|
196 |
+
# Combine datasets
|
197 |
+
combined_df = pd.concat([df, augmented_df], ignore_index=True)
|
198 |
+
|
199 |
+
# Save balanced dataset
|
200 |
+
combined_df.to_csv(output_file, index=False)
|
201 |
+
logger.info(f"\nSaved balanced dataset to {output_file}")
|
202 |
+
|
203 |
+
# Final distribution check
|
204 |
+
logger.info("\nFinal distribution after balancing:")
|
205 |
+
final_dist = analyze_language_distribution(combined_df)
|
206 |
+
final_label_dist = analyze_label_distribution(combined_df, 'en')
|
207 |
+
|
208 |
+
# Save distribution statistics
|
209 |
+
stats = {
|
210 |
+
'timestamp': timestamp,
|
211 |
+
'initial_distribution': {
|
212 |
+
'languages': initial_dist.to_dict(),
|
213 |
+
'english_labels': initial_label_dist
|
214 |
+
},
|
215 |
+
'final_distribution': {
|
216 |
+
'languages': final_dist.to_dict(),
|
217 |
+
'english_labels': final_label_dist
|
218 |
+
},
|
219 |
+
'samples_generated': len(augmented_df),
|
220 |
+
'total_samples': len(combined_df)
|
221 |
+
}
|
222 |
+
|
223 |
+
stats_file = f'logs/balance_stats_{timestamp}.json'
|
224 |
+
with open(stats_file, 'w') as f:
|
225 |
+
json.dump(stats, f, indent=2)
|
226 |
+
logger.info(f"\nSaved balancing statistics to {stats_file}")
|
227 |
+
|
228 |
+
except Exception as e:
|
229 |
+
logger.error(f"Error during balancing: {str(e)}")
|
230 |
+
raise
|
231 |
+
|
232 |
+
def main():
|
233 |
+
balance_english_data()
|
234 |
+
|
235 |
+
if __name__ == "__main__":
|
236 |
+
logger.info("Starting English data balancing process...")
|
237 |
+
main()
|
augmentation/threat_augment.py
ADDED
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import (
|
3 |
+
AutoModelForCausalLM,
|
4 |
+
AutoTokenizer,
|
5 |
+
BitsAndBytesConfig
|
6 |
+
)
|
7 |
+
from langdetect import detect
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
from tqdm import tqdm
|
11 |
+
from pathlib import Path
|
12 |
+
import logging
|
13 |
+
import gc
|
14 |
+
from typing import List
|
15 |
+
import json
|
16 |
+
from datetime import datetime, timedelta
|
17 |
+
import time
|
18 |
+
import sys
|
19 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
20 |
+
from sklearn.linear_model import LogisticRegression
|
21 |
+
import joblib
|
22 |
+
|
23 |
+
# Create log directories
|
24 |
+
log_dir = Path("logs")
|
25 |
+
log_dir.mkdir(exist_ok=True)
|
26 |
+
|
27 |
+
# Get timestamp for log file
|
28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
29 |
+
log_file = log_dir / f"generation_{timestamp}.log"
|
30 |
+
|
31 |
+
# Configure logging once at the start
|
32 |
+
logging.basicConfig(
|
33 |
+
level=logging.INFO,
|
34 |
+
format='%(asctime)s | %(message)s',
|
35 |
+
handlers=[
|
36 |
+
logging.StreamHandler(sys.stdout),
|
37 |
+
logging.FileHandler(log_file)
|
38 |
+
]
|
39 |
+
)
|
40 |
+
|
41 |
+
logger = logging.getLogger(__name__)
|
42 |
+
logger.info(f"Starting new run. Log file: {log_file}")
|
43 |
+
|
44 |
+
def log_separator(message: str = ""):
|
45 |
+
"""Print a separator line with optional message"""
|
46 |
+
if message:
|
47 |
+
logger.info("\n" + "="*40 + f" {message} " + "="*40)
|
48 |
+
else:
|
49 |
+
logger.info("\n" + "="*100)
|
50 |
+
|
51 |
+
class FastThreatValidator:
|
52 |
+
"""Fast threat validation using logistic regression"""
|
53 |
+
def __init__(self, model_path: str = "weights/threat_validator.joblib"):
|
54 |
+
self.model_path = model_path
|
55 |
+
if Path(model_path).exists():
|
56 |
+
logger.info("Loading fast threat validator...")
|
57 |
+
model_data = joblib.load(model_path)
|
58 |
+
self.vectorizer = model_data['vectorizer']
|
59 |
+
self.model = model_data['model']
|
60 |
+
logger.info("✓ Fast validator loaded")
|
61 |
+
else:
|
62 |
+
logger.info("Training fast threat validator...")
|
63 |
+
self._train_validator()
|
64 |
+
logger.info("✓ Fast validator trained and saved")
|
65 |
+
|
66 |
+
def _train_validator(self):
|
67 |
+
"""Train a simple logistic regression model for threat detection"""
|
68 |
+
# Load training data
|
69 |
+
train_df = pd.read_csv("dataset/split/train.csv")
|
70 |
+
|
71 |
+
# Prepare data
|
72 |
+
X = train_df['comment_text'].fillna('')
|
73 |
+
y = train_df['threat']
|
74 |
+
|
75 |
+
# Create and fit vectorizer
|
76 |
+
self.vectorizer = TfidfVectorizer(
|
77 |
+
max_features=10000,
|
78 |
+
ngram_range=(1, 2),
|
79 |
+
strip_accents='unicode',
|
80 |
+
min_df=2
|
81 |
+
)
|
82 |
+
X_vec = self.vectorizer.fit_transform(X)
|
83 |
+
|
84 |
+
# Train model
|
85 |
+
self.model = LogisticRegression(
|
86 |
+
C=1.0,
|
87 |
+
class_weight='balanced',
|
88 |
+
max_iter=200,
|
89 |
+
n_jobs=-1
|
90 |
+
)
|
91 |
+
self.model.fit(X_vec, y)
|
92 |
+
|
93 |
+
# Save model
|
94 |
+
joblib.dump({
|
95 |
+
'vectorizer': self.vectorizer,
|
96 |
+
'model': self.model
|
97 |
+
}, self.model_path)
|
98 |
+
|
99 |
+
def validate(self, texts: List[str], threshold: float = 0.6) -> List[bool]:
|
100 |
+
"""Validate texts using the fast model"""
|
101 |
+
# Vectorize texts
|
102 |
+
X = self.vectorizer.transform(texts)
|
103 |
+
|
104 |
+
# Get probabilities
|
105 |
+
probs = self.model.predict_proba(X)[:, 1]
|
106 |
+
|
107 |
+
# Return boolean mask
|
108 |
+
return probs >= threshold
|
109 |
+
|
110 |
+
class ThreatAugmenter:
|
111 |
+
def __init__(self, seed_samples_path: str = "dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv"):
|
112 |
+
log_separator("INITIALIZATION")
|
113 |
+
|
114 |
+
# Use global log file
|
115 |
+
self.log_file = log_file
|
116 |
+
|
117 |
+
# Initialize generation buffer
|
118 |
+
self.generation_buffer = []
|
119 |
+
self.buffer_size = 100 # Flush buffer every 100 entries
|
120 |
+
|
121 |
+
# Multi-GPU setup
|
122 |
+
self.num_gpus = torch.cuda.device_count()
|
123 |
+
if self.num_gpus > 0:
|
124 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
125 |
+
torch.backends.cudnn.allow_tf32 = True
|
126 |
+
logger.info(f"Found {self.num_gpus} GPUs:")
|
127 |
+
for i in range(self.num_gpus):
|
128 |
+
mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
|
129 |
+
logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f}GB)")
|
130 |
+
|
131 |
+
# Load models
|
132 |
+
log_separator("LOADING MODELS")
|
133 |
+
logger.info("Loading Mistral-7B...")
|
134 |
+
|
135 |
+
# Configure model for multi-GPU
|
136 |
+
quantization_config = BitsAndBytesConfig(
|
137 |
+
load_in_4bit=True,
|
138 |
+
bnb_4bit_compute_dtype=torch.float16,
|
139 |
+
bnb_4bit_quant_type="nf4",
|
140 |
+
bnb_4bit_use_double_quant=True
|
141 |
+
)
|
142 |
+
|
143 |
+
self.llm = AutoModelForCausalLM.from_pretrained(
|
144 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
145 |
+
device_map="balanced", # Ensures proper dual GPU usage
|
146 |
+
torch_dtype=torch.float16,
|
147 |
+
quantization_config=quantization_config,
|
148 |
+
max_memory={0: "22GB", 1: "22GB"} # Explicitly set memory limits for each GPU
|
149 |
+
)
|
150 |
+
|
151 |
+
self.llm_tokenizer = AutoTokenizer.from_pretrained(
|
152 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
153 |
+
padding_side="left",
|
154 |
+
use_fast=True
|
155 |
+
)
|
156 |
+
self.llm_tokenizer.pad_token = self.llm_tokenizer.eos_token
|
157 |
+
logger.info("✓ Mistral-7B loaded")
|
158 |
+
|
159 |
+
# Initialize fast validator
|
160 |
+
self.validator = FastThreatValidator()
|
161 |
+
logger.info("✓ Fast validator initialized")
|
162 |
+
|
163 |
+
# Load and preprocess data
|
164 |
+
log_separator("LOADING DATA")
|
165 |
+
logger.info(f"Loading samples from {seed_samples_path}")
|
166 |
+
self.seed_df = pd.read_csv(seed_samples_path)
|
167 |
+
self.en_threat_samples = self.seed_df[
|
168 |
+
(self.seed_df['lang'] == 'en') &
|
169 |
+
(self.seed_df['threat'] == 1)
|
170 |
+
]
|
171 |
+
logger.info(f"Found {len(self.en_threat_samples)} EN threat samples")
|
172 |
+
|
173 |
+
# Optimize batch processing
|
174 |
+
self.max_batch_size = 48 # Increased batch size
|
175 |
+
self.prefetch_factor = 4
|
176 |
+
self.num_workers = 8
|
177 |
+
|
178 |
+
# Memory cleanup
|
179 |
+
torch.cuda.empty_cache()
|
180 |
+
gc.collect()
|
181 |
+
log_separator()
|
182 |
+
|
183 |
+
def generate_prompt(self, seed_text: str) -> str:
|
184 |
+
"""Generate a prompt for realistic threatening content"""
|
185 |
+
return f"""<s>[INST] Generate a threatening social media comment that is realistic and intimidating.
|
186 |
+
|
187 |
+
Reference example (generate something different): {seed_text}
|
188 |
+
|
189 |
+
Requirements:
|
190 |
+
- Make it genuinely threatening
|
191 |
+
- Use authentic social media language
|
192 |
+
- Keep it under 50 words
|
193 |
+
- Must be different from example
|
194 |
+
|
195 |
+
Generate ONLY the comment: [/INST]"""
|
196 |
+
|
197 |
+
def flush_buffer(self):
|
198 |
+
"""Flush the generation buffer to disk"""
|
199 |
+
if self.generation_buffer:
|
200 |
+
try:
|
201 |
+
with open(self.log_file, 'a', encoding='utf-8') as f:
|
202 |
+
for entry in self.generation_buffer:
|
203 |
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
204 |
+
self.generation_buffer = []
|
205 |
+
except Exception as e:
|
206 |
+
logger.error(f"Failed to flush buffer: {str(e)}")
|
207 |
+
|
208 |
+
def log_generation(self, seed_text: str, prompt: str, generated_text: str, is_valid: bool):
|
209 |
+
"""Buffer log generation details"""
|
210 |
+
log_entry = {
|
211 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
212 |
+
"seed_text": seed_text,
|
213 |
+
"prompt": prompt,
|
214 |
+
"generated_text": generated_text,
|
215 |
+
"is_valid": is_valid
|
216 |
+
}
|
217 |
+
|
218 |
+
self.generation_buffer.append(log_entry)
|
219 |
+
|
220 |
+
# Flush buffer if it reaches the size limit
|
221 |
+
if len(self.generation_buffer) >= self.buffer_size:
|
222 |
+
self.flush_buffer()
|
223 |
+
|
224 |
+
def generate_samples(self, prompts: List[str], seed_texts: List[str]) -> List[str]:
|
225 |
+
try:
|
226 |
+
with torch.amp.autocast('cuda', dtype=torch.float16):
|
227 |
+
inputs = self.llm_tokenizer(prompts, return_tensors="pt", padding=True,
|
228 |
+
truncation=True, max_length=256).to(self.llm.device)
|
229 |
+
|
230 |
+
outputs = self.llm.generate(
|
231 |
+
**inputs,
|
232 |
+
max_new_tokens=32,
|
233 |
+
temperature=0.95,
|
234 |
+
do_sample=True,
|
235 |
+
top_p=0.92,
|
236 |
+
top_k=50,
|
237 |
+
num_return_sequences=1,
|
238 |
+
repetition_penalty=1.15,
|
239 |
+
pad_token_id=self.llm_tokenizer.pad_token_id,
|
240 |
+
eos_token_id=self.llm_tokenizer.eos_token_id
|
241 |
+
)
|
242 |
+
|
243 |
+
texts = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=False)
|
244 |
+
cleaned_texts = []
|
245 |
+
valid_count = 0
|
246 |
+
|
247 |
+
# Process responses with minimal logging
|
248 |
+
for idx, text in enumerate(texts):
|
249 |
+
if "[/INST]" in text and "</s>" in text:
|
250 |
+
response = text.split("[/INST]")[1].split("</s>")[0].strip()
|
251 |
+
response = response.strip().strip('"').strip("'")
|
252 |
+
|
253 |
+
word_count = len(response.split())
|
254 |
+
if (word_count >= 3 and word_count <= 50 and
|
255 |
+
not any(x in response.lower() for x in [
|
256 |
+
"generate", "requirements:", "reference",
|
257 |
+
"[inst]", "example"
|
258 |
+
])):
|
259 |
+
cleaned_texts.append(response)
|
260 |
+
valid_count += 1
|
261 |
+
|
262 |
+
# Log only summary statistics
|
263 |
+
if valid_count > 0:
|
264 |
+
logger.info(f"\nBatch Success: {valid_count}/{len(texts)} ({valid_count/len(texts)*100:.1f}%)")
|
265 |
+
|
266 |
+
return cleaned_texts
|
267 |
+
|
268 |
+
except Exception as e:
|
269 |
+
logger.error(f"Generation error: {str(e)}")
|
270 |
+
return []
|
271 |
+
|
272 |
+
def validate_toxicity(self, texts: List[str]) -> torch.Tensor:
|
273 |
+
"""Validate texts using fast logistic regression"""
|
274 |
+
if not texts:
|
275 |
+
return torch.zeros(0, dtype=torch.bool)
|
276 |
+
|
277 |
+
# Get validation mask from fast validator
|
278 |
+
validation_mask = self.validator.validate(texts)
|
279 |
+
|
280 |
+
# Convert to torch tensor
|
281 |
+
return torch.tensor(validation_mask, dtype=torch.bool, device=self.llm.device)
|
282 |
+
|
283 |
+
def validate_language(self, texts: List[str]) -> List[bool]:
|
284 |
+
"""Simple language validation"""
|
285 |
+
return [detect(text) == 'en' for text in texts]
|
286 |
+
|
287 |
+
def augment_dataset(self, target_samples: int = 500, batch_size: int = 32):
|
288 |
+
"""Main augmentation loop with progress bar and CSV saving"""
|
289 |
+
try:
|
290 |
+
start_time = time.time()
|
291 |
+
logger.info(f"Starting generation: target={target_samples}, batch_size={batch_size}")
|
292 |
+
generated_samples = []
|
293 |
+
stats = {
|
294 |
+
"total_attempts": 0,
|
295 |
+
"valid_samples": 0,
|
296 |
+
"batch_times": []
|
297 |
+
}
|
298 |
+
|
299 |
+
# Create output directory if it doesn't exist
|
300 |
+
output_dir = Path("dataset/augmented")
|
301 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
302 |
+
|
303 |
+
# Generate timestamp for the filename
|
304 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
305 |
+
output_file = output_dir / f"threat_augmented_{timestamp}.csv"
|
306 |
+
|
307 |
+
# Initialize progress bar
|
308 |
+
pbar = tqdm(total=target_samples,
|
309 |
+
desc="Generating samples",
|
310 |
+
unit="samples",
|
311 |
+
ncols=100,
|
312 |
+
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]')
|
313 |
+
|
314 |
+
while len(generated_samples) < target_samples:
|
315 |
+
batch_start = time.time()
|
316 |
+
|
317 |
+
seed_texts = self.en_threat_samples['comment_text'].sample(batch_size).tolist()
|
318 |
+
prompts = [self.generate_prompt(text) for text in seed_texts]
|
319 |
+
new_samples = self.generate_samples(prompts, seed_texts)
|
320 |
+
|
321 |
+
if not new_samples:
|
322 |
+
continue
|
323 |
+
|
324 |
+
# Update statistics
|
325 |
+
batch_time = time.time() - batch_start
|
326 |
+
stats["batch_times"].append(batch_time)
|
327 |
+
stats["total_attempts"] += len(new_samples)
|
328 |
+
prev_len = len(generated_samples)
|
329 |
+
generated_samples.extend(new_samples)
|
330 |
+
stats["valid_samples"] = len(generated_samples)
|
331 |
+
|
332 |
+
# Update progress bar
|
333 |
+
pbar.update(len(generated_samples) - prev_len)
|
334 |
+
|
335 |
+
# Calculate and display success rate periodically
|
336 |
+
if len(stats["batch_times"]) % 10 == 0: # Every 10 batches
|
337 |
+
success_rate = (stats["valid_samples"] / stats["total_attempts"]) * 100
|
338 |
+
avg_batch_time = sum(stats["batch_times"][-20:]) / min(len(stats["batch_times"]), 20)
|
339 |
+
pbar.set_postfix({
|
340 |
+
'Success Rate': f'{success_rate:.1f}%',
|
341 |
+
'Batch Time': f'{avg_batch_time:.2f}s'
|
342 |
+
})
|
343 |
+
|
344 |
+
# Cleanup
|
345 |
+
if len(generated_samples) % (batch_size * 5) == 0:
|
346 |
+
torch.cuda.empty_cache()
|
347 |
+
gc.collect()
|
348 |
+
|
349 |
+
# Close progress bar
|
350 |
+
pbar.close()
|
351 |
+
|
352 |
+
# Create DataFrame and save to CSV
|
353 |
+
df = pd.DataFrame({
|
354 |
+
'text': generated_samples[:target_samples],
|
355 |
+
'label': 1, # These are all threat samples
|
356 |
+
'source': 'augmented',
|
357 |
+
'timestamp': timestamp
|
358 |
+
})
|
359 |
+
|
360 |
+
# Save to CSV
|
361 |
+
df.to_csv(output_file, index=False)
|
362 |
+
logger.info(f"\nSaved {len(df)} samples to {output_file}")
|
363 |
+
|
364 |
+
# Final stats
|
365 |
+
total_time = str(timedelta(seconds=int(time.time() - start_time)))
|
366 |
+
logger.info(f"Generation complete: {len(generated_samples)} samples generated in {total_time}")
|
367 |
+
|
368 |
+
return df
|
369 |
+
|
370 |
+
except Exception as e:
|
371 |
+
logger.error(f"Generation failed: {str(e)}")
|
372 |
+
raise
|
373 |
+
|
374 |
+
if __name__ == "__main__":
|
375 |
+
torch.cuda.empty_cache()
|
376 |
+
gc.collect()
|
377 |
+
|
378 |
+
augmenter = ThreatAugmenter()
|
379 |
+
augmented_df = augmenter.augment_dataset(target_samples=500)
|
augmentation/toxic_augment.py
ADDED
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import (
|
3 |
+
AutoModelForCausalLM,
|
4 |
+
AutoTokenizer,
|
5 |
+
BitsAndBytesConfig
|
6 |
+
)
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
from tqdm import tqdm
|
10 |
+
from pathlib import Path
|
11 |
+
import logging
|
12 |
+
import gc
|
13 |
+
from typing import List, Dict
|
14 |
+
import json
|
15 |
+
from datetime import datetime
|
16 |
+
import time
|
17 |
+
import sys
|
18 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
19 |
+
from sklearn.linear_model import LogisticRegression
|
20 |
+
import joblib
|
21 |
+
import random
|
22 |
+
|
23 |
+
# Create log directories
|
24 |
+
log_dir = Path("logs")
|
25 |
+
log_dir.mkdir(exist_ok=True)
|
26 |
+
|
27 |
+
# Get timestamp for log file
|
28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
29 |
+
log_file = log_dir / f"generation_{timestamp}.log"
|
30 |
+
|
31 |
+
# Configure logging
|
32 |
+
logging.basicConfig(
|
33 |
+
level=logging.INFO,
|
34 |
+
format='%(asctime)s | %(message)s',
|
35 |
+
handlers=[
|
36 |
+
logging.StreamHandler(sys.stdout),
|
37 |
+
logging.FileHandler(log_file)
|
38 |
+
]
|
39 |
+
)
|
40 |
+
|
41 |
+
logger = logging.getLogger(__name__)
|
42 |
+
logger.info(f"Starting new run. Log file: {log_file}")
|
43 |
+
|
44 |
+
class FastToxicValidator:
|
45 |
+
"""Fast toxicity validation using logistic regression"""
|
46 |
+
def __init__(self, model_path: str = "weights/toxic_validator.joblib"):
|
47 |
+
self.model_path = model_path
|
48 |
+
if Path(model_path).exists():
|
49 |
+
logger.info("Loading fast toxic validator...")
|
50 |
+
model_data = joblib.load(model_path)
|
51 |
+
self.vectorizers = model_data['vectorizers']
|
52 |
+
self.models = model_data['models']
|
53 |
+
logger.info("✓ Fast validator loaded")
|
54 |
+
else:
|
55 |
+
logger.info("Training fast toxic validator...")
|
56 |
+
self._train_validator()
|
57 |
+
logger.info("✓ Fast validator trained and saved")
|
58 |
+
|
59 |
+
def _train_validator(self):
|
60 |
+
"""Train logistic regression models for each toxicity type"""
|
61 |
+
# Load training data
|
62 |
+
train_df = pd.read_csv("dataset/split/train.csv")
|
63 |
+
|
64 |
+
# Labels to validate
|
65 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
66 |
+
|
67 |
+
self.vectorizers = {}
|
68 |
+
self.models = {}
|
69 |
+
|
70 |
+
# Train a model for each label
|
71 |
+
for label in labels:
|
72 |
+
# Create and fit vectorizer
|
73 |
+
vectorizer = TfidfVectorizer(
|
74 |
+
max_features=10000,
|
75 |
+
ngram_range=(1, 2),
|
76 |
+
strip_accents='unicode',
|
77 |
+
min_df=2
|
78 |
+
)
|
79 |
+
X = vectorizer.fit_transform(train_df['comment_text'].fillna(''))
|
80 |
+
y = train_df[label]
|
81 |
+
|
82 |
+
# Train model
|
83 |
+
model = LogisticRegression(
|
84 |
+
C=1.0,
|
85 |
+
class_weight='balanced',
|
86 |
+
max_iter=200,
|
87 |
+
n_jobs=-1
|
88 |
+
)
|
89 |
+
model.fit(X, y)
|
90 |
+
|
91 |
+
self.vectorizers[label] = vectorizer
|
92 |
+
self.models[label] = model
|
93 |
+
|
94 |
+
# Save models
|
95 |
+
joblib.dump({
|
96 |
+
'vectorizers': self.vectorizers,
|
97 |
+
'models': self.models
|
98 |
+
}, self.model_path)
|
99 |
+
|
100 |
+
def get_probabilities(self, texts: List[str], label: str) -> np.ndarray:
|
101 |
+
"""Get raw probabilities for a specific label"""
|
102 |
+
X = self.vectorizers[label].transform(texts)
|
103 |
+
return self.models[label].predict_proba(X)[:, 1]
|
104 |
+
|
105 |
+
def validate(self, texts: List[str], label: str, threshold: float = 0.5) -> List[bool]:
|
106 |
+
"""Validate texts using the fast model with a lower threshold of 0.5"""
|
107 |
+
# Vectorize texts
|
108 |
+
X = self.vectorizers[label].transform(texts)
|
109 |
+
|
110 |
+
# Get probabilities
|
111 |
+
probs = self.models[label].predict_proba(X)[:, 1]
|
112 |
+
|
113 |
+
# Return boolean mask with lower threshold
|
114 |
+
return probs >= threshold
|
115 |
+
|
116 |
+
class ToxicAugmenter:
|
117 |
+
def __init__(self):
|
118 |
+
logger.info("Initializing ToxicAugmenter...")
|
119 |
+
|
120 |
+
# Initialize generation buffer
|
121 |
+
self.generation_buffer = []
|
122 |
+
self.buffer_size = 100
|
123 |
+
|
124 |
+
# Multi-GPU setup
|
125 |
+
self.num_gpus = torch.cuda.device_count()
|
126 |
+
if self.num_gpus > 0:
|
127 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
128 |
+
torch.backends.cudnn.allow_tf32 = True
|
129 |
+
logger.info(f"Found {self.num_gpus} GPUs:")
|
130 |
+
for i in range(self.num_gpus):
|
131 |
+
mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
|
132 |
+
logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f}GB)")
|
133 |
+
|
134 |
+
# Load models with optimized settings
|
135 |
+
logger.info("Loading Mistral-7B...")
|
136 |
+
|
137 |
+
# Configure model for multi-GPU with optimized settings
|
138 |
+
quantization_config = BitsAndBytesConfig(
|
139 |
+
bnb_4bit_compute_dtype=torch.float16,
|
140 |
+
bnb_4bit_quant_type="nf4",
|
141 |
+
bnb_4bit_use_double_quant=True
|
142 |
+
)
|
143 |
+
|
144 |
+
# Initialize tokenizer first
|
145 |
+
self.llm_tokenizer = AutoTokenizer.from_pretrained(
|
146 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
147 |
+
padding_side="left",
|
148 |
+
use_fast=True,
|
149 |
+
model_max_length=512, # Limit context size for faster processing
|
150 |
+
pad_token="</s>", # Set pad token explicitly
|
151 |
+
add_eos_token=True,
|
152 |
+
add_bos_token=True
|
153 |
+
)
|
154 |
+
|
155 |
+
# Set pad token id explicitly
|
156 |
+
self.llm_tokenizer.pad_token = self.llm_tokenizer.eos_token
|
157 |
+
self.llm_tokenizer.pad_token_id = self.llm_tokenizer.eos_token_id
|
158 |
+
|
159 |
+
# Now initialize model with tokenizer's pad token
|
160 |
+
self.llm = AutoModelForCausalLM.from_pretrained(
|
161 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
162 |
+
device_map="balanced",
|
163 |
+
torch_dtype=torch.float16,
|
164 |
+
quantization_config=quantization_config,
|
165 |
+
max_memory={0: "22GB", 1: "22GB"},
|
166 |
+
use_cache=True, # Enable KV cache for faster generation
|
167 |
+
pad_token_id=self.llm_tokenizer.pad_token_id
|
168 |
+
)
|
169 |
+
|
170 |
+
logger.info("✓ Mistral-7B loaded")
|
171 |
+
|
172 |
+
# Initialize validator
|
173 |
+
self.validator = FastToxicValidator()
|
174 |
+
logger.info("✓ Fast validator initialized")
|
175 |
+
|
176 |
+
def generate_prompt(self, seed_text: str, label: str) -> str:
|
177 |
+
"""Generate a prompt for a single toxicity type"""
|
178 |
+
base_prompt = """<s>[INST] You are tasked with generating a highly {type} comment. Your goal is to create content that would definitely be flagged as {type}.
|
179 |
+
|
180 |
+
Example for reference (generate something different): "{seed_text}"
|
181 |
+
|
182 |
+
Requirements:
|
183 |
+
- Must be extremely {type} in nature
|
184 |
+
- Use very strong language
|
185 |
+
- Must be different from the example
|
186 |
+
|
187 |
+
Generate ONLY the {type} comment (no explanations): [/INST]"""
|
188 |
+
|
189 |
+
type_mapping = {
|
190 |
+
'toxic': 'toxic and offensive',
|
191 |
+
'severe_toxic': 'toxic and hostile',
|
192 |
+
'obscene': 'obscene and vulgar',
|
193 |
+
'threat': 'threatening and intimidating',
|
194 |
+
'insult': 'insulting and demeaning',
|
195 |
+
'identity_hate': 'discriminatory and hateful'
|
196 |
+
}
|
197 |
+
|
198 |
+
return base_prompt.format(type=type_mapping[label], seed_text=seed_text)
|
199 |
+
|
200 |
+
def flush_buffer(self):
|
201 |
+
"""Flush the generation buffer to disk"""
|
202 |
+
if self.generation_buffer:
|
203 |
+
try:
|
204 |
+
with open(log_file, 'a', encoding='utf-8') as f:
|
205 |
+
for entry in self.generation_buffer:
|
206 |
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
207 |
+
self.generation_buffer = []
|
208 |
+
except Exception as e:
|
209 |
+
logger.error(f"Failed to flush buffer: {str(e)}")
|
210 |
+
|
211 |
+
def log_generation(self, seed_text: str, prompt: str, generated_text: str, validation_results: Dict[str, bool]):
|
212 |
+
"""Buffer log generation details with proper JSON serialization"""
|
213 |
+
# Convert numpy/torch boolean values to Python booleans
|
214 |
+
serializable_results = {
|
215 |
+
k: bool(v) for k, v in validation_results.items()
|
216 |
+
}
|
217 |
+
|
218 |
+
log_entry = {
|
219 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
220 |
+
"seed_text": seed_text,
|
221 |
+
"prompt": prompt,
|
222 |
+
"generated_text": generated_text,
|
223 |
+
"validation_results": serializable_results
|
224 |
+
}
|
225 |
+
|
226 |
+
self.generation_buffer.append(log_entry)
|
227 |
+
|
228 |
+
# Flush buffer if it reaches the size limit
|
229 |
+
if len(self.generation_buffer) >= self.buffer_size:
|
230 |
+
self.flush_buffer()
|
231 |
+
|
232 |
+
def validate_sample(self, text: str, label: str, attempts: int) -> bool:
|
233 |
+
"""Validate a generated sample for a single label"""
|
234 |
+
try:
|
235 |
+
# Get probability for the label
|
236 |
+
prob = self.validator.get_probabilities([text], label)[0]
|
237 |
+
|
238 |
+
# Super lenient thresholds to get samples
|
239 |
+
thresholds = {
|
240 |
+
'toxic': 0.3, # Very lenient to get started
|
241 |
+
'severe_toxic': 0.2,
|
242 |
+
'obscene': 0.2,
|
243 |
+
'threat': 0.2,
|
244 |
+
'insult': 0.2,
|
245 |
+
'identity_hate': 0.2
|
246 |
+
}
|
247 |
+
threshold = thresholds.get(label, 0.2)
|
248 |
+
|
249 |
+
# Log validation result
|
250 |
+
passed = prob >= threshold
|
251 |
+
if passed or (attempts % 5 == 0): # More frequent logging
|
252 |
+
logger.info(f"\nValidation - Label: {label}, Text: {text}")
|
253 |
+
logger.info(f"Probability: {prob:.3f}, Threshold: {threshold:.2f}, Passed: {passed}")
|
254 |
+
|
255 |
+
return passed
|
256 |
+
|
257 |
+
except Exception as e:
|
258 |
+
logger.error(f"Validation error: {str(e)}")
|
259 |
+
return False
|
260 |
+
|
261 |
+
def generate_samples(self, target_samples: int, label: str,
|
262 |
+
seed_texts: List[str], total_timeout: int = 300) -> pd.DataFrame:
|
263 |
+
"""Generate samples for a single label with timeouts"""
|
264 |
+
start_time = time.time()
|
265 |
+
generated_samples = []
|
266 |
+
attempts = 0
|
267 |
+
max_attempts = target_samples * 50 # Much more attempts allowed
|
268 |
+
batch_size = min(16, target_samples) # Smaller batch size for better control
|
269 |
+
|
270 |
+
pbar = tqdm(total=target_samples, desc=f"Generating {label} samples")
|
271 |
+
|
272 |
+
try:
|
273 |
+
while len(generated_samples) < target_samples and attempts < max_attempts:
|
274 |
+
# Check timeout
|
275 |
+
if time.time() - start_time > total_timeout:
|
276 |
+
logger.warning(f"Generation timed out after {total_timeout} seconds")
|
277 |
+
break
|
278 |
+
|
279 |
+
attempts += 1
|
280 |
+
|
281 |
+
# Select random seed text and generate prompt
|
282 |
+
seed_text = random.choice(seed_texts)
|
283 |
+
prompt = self.generate_prompt(seed_text, label)
|
284 |
+
|
285 |
+
try:
|
286 |
+
# Generate text with optimized parameters
|
287 |
+
inputs = self.llm_tokenizer(prompt, return_tensors="pt", padding=True,
|
288 |
+
truncation=True, max_length=512).to(self.llm.device)
|
289 |
+
|
290 |
+
with torch.no_grad():
|
291 |
+
outputs = self.llm.generate(
|
292 |
+
**inputs,
|
293 |
+
max_new_tokens=200, # Doubled for longer content
|
294 |
+
num_beams=4, # Added beam search
|
295 |
+
temperature=1.35, # Higher temperature for more randomness
|
296 |
+
do_sample=True,
|
297 |
+
top_p=0.99, # Almost no filtering
|
298 |
+
top_k=200, # More options
|
299 |
+
num_return_sequences=1,
|
300 |
+
repetition_penalty=1.0, # No repetition penalty
|
301 |
+
no_repeat_ngram_size=0, # No ngram blocking
|
302 |
+
early_stopping=True, # Stop when complete
|
303 |
+
pad_token_id=self.llm_tokenizer.pad_token_id,
|
304 |
+
bos_token_id=self.llm_tokenizer.bos_token_id,
|
305 |
+
eos_token_id=self.llm_tokenizer.eos_token_id,
|
306 |
+
use_cache=True
|
307 |
+
)
|
308 |
+
|
309 |
+
text = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
310 |
+
|
311 |
+
# Extract the generated text after [/INST]
|
312 |
+
if "[/INST]" in text:
|
313 |
+
output = text.split("[/INST]")[1].strip()
|
314 |
+
output = output.strip().strip('"').strip("'")
|
315 |
+
|
316 |
+
# Only check minimum length
|
317 |
+
if len(output) >= 10:
|
318 |
+
# Log generation attempt
|
319 |
+
if attempts % 5 == 0: # More frequent logging
|
320 |
+
logger.info(f"\nAttempt {attempts}: Generated text: {output}")
|
321 |
+
|
322 |
+
# Validate sample
|
323 |
+
if self.validate_sample(output, label, attempts):
|
324 |
+
sample_dict = {'comment_text': output}
|
325 |
+
for l in ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']:
|
326 |
+
sample_dict[l] = 1 if l == label else 0
|
327 |
+
generated_samples.append(sample_dict)
|
328 |
+
pbar.update(1)
|
329 |
+
logger.info(f"✓ Valid {label} sample generated ({len(generated_samples)}/{target_samples})")
|
330 |
+
|
331 |
+
except Exception as e:
|
332 |
+
logger.error(f"Generation error on attempt {attempts}: {str(e)}")
|
333 |
+
continue
|
334 |
+
|
335 |
+
# Clear cache less frequently
|
336 |
+
if attempts % 200 == 0:
|
337 |
+
torch.cuda.empty_cache()
|
338 |
+
gc.collect()
|
339 |
+
|
340 |
+
finally:
|
341 |
+
pbar.close()
|
342 |
+
logger.info(f"Generation finished: {len(generated_samples)}/{target_samples} samples in {attempts} attempts")
|
343 |
+
|
344 |
+
# Return results even if partial
|
345 |
+
if generated_samples:
|
346 |
+
return pd.DataFrame(generated_samples)
|
347 |
+
return None
|
348 |
+
|
349 |
+
def augment_dataset(self, target_samples: int, label: str, seed_texts: List[str], timeout_minutes: int = 5) -> pd.DataFrame:
|
350 |
+
"""Generate a specific number of samples with given label combination"""
|
351 |
+
logger.info(f"\nGenerating {target_samples} samples with label: {label}")
|
352 |
+
|
353 |
+
generated_samples = []
|
354 |
+
batch_size = min(32, target_samples)
|
355 |
+
start_time = time.time()
|
356 |
+
timeout_seconds = min(timeout_minutes * 60, 300) # Hard limit of 5 minutes
|
357 |
+
total_generated = 0
|
358 |
+
pbar = None
|
359 |
+
|
360 |
+
try:
|
361 |
+
# Create progress bar
|
362 |
+
pbar = tqdm(
|
363 |
+
total=target_samples,
|
364 |
+
desc="Generating",
|
365 |
+
unit="samples",
|
366 |
+
ncols=100,
|
367 |
+
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]'
|
368 |
+
)
|
369 |
+
|
370 |
+
while total_generated < target_samples:
|
371 |
+
# Check timeout
|
372 |
+
elapsed_time = time.time() - start_time
|
373 |
+
if elapsed_time > timeout_seconds:
|
374 |
+
logger.warning(f"Time limit reached after {elapsed_time/60:.1f} minutes")
|
375 |
+
break
|
376 |
+
|
377 |
+
# Calculate remaining samples needed
|
378 |
+
remaining = target_samples - total_generated
|
379 |
+
current_batch_size = min(batch_size, remaining)
|
380 |
+
|
381 |
+
# Select batch of seed texts
|
382 |
+
batch_seeds = np.random.choice(seed_texts, size=current_batch_size)
|
383 |
+
prompts = [self.generate_prompt(seed, label) for seed in batch_seeds]
|
384 |
+
|
385 |
+
# Generate and validate samples
|
386 |
+
batch_start = time.time()
|
387 |
+
new_samples = self.generate_samples(
|
388 |
+
target_samples=current_batch_size,
|
389 |
+
label=label,
|
390 |
+
seed_texts=batch_seeds,
|
391 |
+
total_timeout=timeout_seconds - elapsed_time
|
392 |
+
)
|
393 |
+
|
394 |
+
if new_samples is not None and not new_samples.empty:
|
395 |
+
if len(new_samples) > remaining:
|
396 |
+
new_samples = new_samples.head(remaining)
|
397 |
+
|
398 |
+
generated_samples.append(new_samples)
|
399 |
+
num_new = len(new_samples)
|
400 |
+
total_generated += num_new
|
401 |
+
|
402 |
+
# Update progress bar
|
403 |
+
pbar.update(num_new)
|
404 |
+
|
405 |
+
# Calculate and display metrics
|
406 |
+
elapsed_minutes = elapsed_time / 60
|
407 |
+
rate = total_generated / elapsed_minutes if elapsed_minutes > 0 else 0
|
408 |
+
batch_time = time.time() - batch_start
|
409 |
+
time_remaining = max(0, timeout_seconds - elapsed_time)
|
410 |
+
|
411 |
+
pbar.set_postfix({
|
412 |
+
'rate': f'{rate:.1f}/min',
|
413 |
+
'batch': f'{batch_time:.1f}s',
|
414 |
+
'remain': f'{time_remaining:.0f}s'
|
415 |
+
}, refresh=True)
|
416 |
+
|
417 |
+
# Memory management every few batches
|
418 |
+
if total_generated % (batch_size * 4) == 0:
|
419 |
+
torch.cuda.empty_cache()
|
420 |
+
|
421 |
+
# Combine all generated samples
|
422 |
+
if generated_samples:
|
423 |
+
final_df = pd.concat(generated_samples, ignore_index=True)
|
424 |
+
if len(final_df) > target_samples:
|
425 |
+
final_df = final_df.head(target_samples)
|
426 |
+
logger.info(f"Successfully generated {len(final_df)} samples in {elapsed_time/60:.1f} minutes")
|
427 |
+
return final_df
|
428 |
+
|
429 |
+
return None
|
430 |
+
|
431 |
+
except Exception as e:
|
432 |
+
logger.error(f"Generation error: {str(e)}")
|
433 |
+
return None
|
434 |
+
finally:
|
435 |
+
if pbar is not None:
|
436 |
+
pbar.close()
|
437 |
+
# Final cleanup
|
438 |
+
self.flush_buffer()
|
439 |
+
torch.cuda.empty_cache()
|
datacard.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Jigsaw Toxic Comment Classification Dataset
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
Version: 1.0
|
5 |
+
Date Created: 2025-02-03
|
6 |
+
|
7 |
+
### Description
|
8 |
+
|
9 |
+
The Jigsaw Toxic Comment Classification Dataset is designed to help identify and classify toxic online comments.
|
10 |
+
It contains text comments with multiple toxicity-related labels including general toxicity, severe toxicity,
|
11 |
+
obscenity, threats, insults, and identity-based hate speech.
|
12 |
+
|
13 |
+
The dataset includes:
|
14 |
+
1. Main training data with binary toxicity labels
|
15 |
+
2. Unintended bias training data with additional identity attributes
|
16 |
+
3. Processed versions with sequence length 128 for direct model input
|
17 |
+
4. Test and validation sets for model evaluation
|
18 |
+
|
19 |
+
This dataset was created by Jigsaw and Google's Conversation AI team to help improve online conversation quality
|
20 |
+
by identifying and classifying various forms of toxic comments.
|
21 |
+
|
22 |
+
|
23 |
+
## Column Descriptions
|
24 |
+
|
25 |
+
- **id**: Unique identifier for each comment
|
26 |
+
- **comment_text**: The text content of the comment to be classified
|
27 |
+
- **toxic**: Binary label indicating if the comment is toxic
|
28 |
+
- **severe_toxic**: Binary label for extremely toxic comments
|
29 |
+
- **obscene**: Binary label for obscene content
|
30 |
+
- **threat**: Binary label for threatening content
|
31 |
+
- **insult**: Binary label for insulting content
|
32 |
+
- **identity_hate**: Binary label for identity-based hate speech
|
33 |
+
- **target**: Overall toxicity score (in bias dataset)
|
34 |
+
- **identity_attack**: Binary label for identity-based attacks
|
35 |
+
- **identity_***: Various identity-related attributes in the bias dataset
|
36 |
+
- **lang**: Language of the comment
|
37 |
+
|
38 |
+
## Files
|
39 |
+
|
docker-compose.yml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
toxic-classifier:
|
5 |
+
build: .
|
6 |
+
runtime: nvidia # Enable NVIDIA runtime for GPU support
|
7 |
+
environment:
|
8 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
9 |
+
- WANDB_API_KEY=${WANDB_API_KEY} # Set this in .env file
|
10 |
+
volumes:
|
11 |
+
- ./dataset:/app/dataset # Mount dataset directory
|
12 |
+
- ./weights:/app/weights # Mount weights directory
|
13 |
+
command: python model/train.py # Default command, can be overridden
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_identity_hate.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_insult.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_obscene.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_severe_toxic.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_threat.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_0.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_1.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_2.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_3.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_4.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_5.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_6.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/eval_params.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"timestamp": "20250208_161149",
|
3 |
+
"model_path": "weights/toxic_classifier_xlm-roberta-large",
|
4 |
+
"test_file": "dataset/split/test.csv",
|
5 |
+
"batch_size": 32,
|
6 |
+
"num_workers": null
|
7 |
+
}
|
evaluation_results/eval_20250208_161149/evaluation_results.json
ADDED
@@ -0,0 +1,2020 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"overall": {
|
3 |
+
"loss": 0.18776385083473274,
|
4 |
+
"auc_macro": 0.9259171799699759,
|
5 |
+
"auc_weighted": 0.9442696333538418,
|
6 |
+
"precision_macro": 0.4388604553772207,
|
7 |
+
"precision_weighted": 0.7008073672218381,
|
8 |
+
"recall_macro": 0.8836014181101747,
|
9 |
+
"recall_weighted": 0.9051010634378761,
|
10 |
+
"f1_macro": 0.530782857064369,
|
11 |
+
"f1_weighted": 0.7669279374035199,
|
12 |
+
"class_support": {
|
13 |
+
"toxic": 17646,
|
14 |
+
"severe_toxic": 1649,
|
15 |
+
"obscene": 8625,
|
16 |
+
"threat": 714,
|
17 |
+
"insult": 10201,
|
18 |
+
"identity_hate": 1882
|
19 |
+
},
|
20 |
+
"per_class_metrics": {
|
21 |
+
"toxic": {
|
22 |
+
"precision": 0.9115322083309974,
|
23 |
+
"recall": 0.9213986172503683,
|
24 |
+
"f1": 0.9164388580446975,
|
25 |
+
"support": 17646,
|
26 |
+
"specificity": 0.9121478677207437
|
27 |
+
},
|
28 |
+
"severe_toxic": {
|
29 |
+
"precision": 0.15755900489049543,
|
30 |
+
"recall": 0.8987265009096422,
|
31 |
+
"f1": 0.26811397557666217,
|
32 |
+
"support": 1649,
|
33 |
+
"specificity": 0.7666597956359139
|
34 |
+
},
|
35 |
+
"obscene": {
|
36 |
+
"precision": 0.6238325281803543,
|
37 |
+
"recall": 0.8983188405797101,
|
38 |
+
"f1": 0.7363269185079592,
|
39 |
+
"support": 8625,
|
40 |
+
"specificity": 0.8268539450765297
|
41 |
+
},
|
42 |
+
"threat": {
|
43 |
+
"precision": 0.10505486598309048,
|
44 |
+
"recall": 0.8179271708683473,
|
45 |
+
"f1": 0.18619480312450185,
|
46 |
+
"support": 714,
|
47 |
+
"specificity": 0.8574253453315757
|
48 |
+
},
|
49 |
+
"insult": {
|
50 |
+
"precision": 0.6205890336590663,
|
51 |
+
"recall": 0.8964807371826291,
|
52 |
+
"f1": 0.7334482896900189,
|
53 |
+
"support": 10201,
|
54 |
+
"specificity": 0.7799425355217067
|
55 |
+
},
|
56 |
+
"identity_hate": {
|
57 |
+
"precision": 0.21459509121932013,
|
58 |
+
"recall": 0.8687566418703507,
|
59 |
+
"f1": 0.3441742974423745,
|
60 |
+
"support": 1882,
|
61 |
+
"specificity": 0.822570123939987
|
62 |
+
}
|
63 |
+
},
|
64 |
+
"class_weights": {
|
65 |
+
"toxic": 0.43338163420684234,
|
66 |
+
"severe_toxic": 0.04049905444900165,
|
67 |
+
"obscene": 0.21182798339759806,
|
68 |
+
"threat": 0.017535673060392463,
|
69 |
+
"insult": 0.2505341749146548,
|
70 |
+
"identity_hate": 0.04622147997151067
|
71 |
+
},
|
72 |
+
"hamming_loss": 0.1618924586235303,
|
73 |
+
"exact_match": 0.499747247809481,
|
74 |
+
"specificity_macro": 0.8275999355377427,
|
75 |
+
"specificity_weighted": 0.8275999355377428,
|
76 |
+
"summary": {
|
77 |
+
"auc": {
|
78 |
+
"macro": 0.9259171799699759,
|
79 |
+
"weighted": 0.9442696333538418
|
80 |
+
},
|
81 |
+
"f1": {
|
82 |
+
"macro": 0.530782857064369,
|
83 |
+
"weighted": 0.7669279374035199
|
84 |
+
},
|
85 |
+
"precision": {
|
86 |
+
"macro": 0.4388604553772207,
|
87 |
+
"weighted": 0.7008073672218381
|
88 |
+
},
|
89 |
+
"recall": {
|
90 |
+
"macro": 0.8836014181101747,
|
91 |
+
"weighted": 0.9051010634378761
|
92 |
+
},
|
93 |
+
"specificity": {
|
94 |
+
"macro": 0.8275999355377427,
|
95 |
+
"weighted": 0.8275999355377428
|
96 |
+
},
|
97 |
+
"other_metrics": {
|
98 |
+
"hamming_loss": 0.1618924586235303,
|
99 |
+
"exact_match": 0.499747247809481
|
100 |
+
},
|
101 |
+
"class_support": {
|
102 |
+
"toxic": 17646,
|
103 |
+
"severe_toxic": 1649,
|
104 |
+
"obscene": 8625,
|
105 |
+
"threat": 714,
|
106 |
+
"insult": 10201,
|
107 |
+
"identity_hate": 1882
|
108 |
+
}
|
109 |
+
}
|
110 |
+
},
|
111 |
+
"per_language": {
|
112 |
+
"0": {
|
113 |
+
"auc": 0.9546775894690953,
|
114 |
+
"precision": 0.714413481020392,
|
115 |
+
"recall": 0.9246670642019479,
|
116 |
+
"f1": 0.7877150106257862,
|
117 |
+
"hamming_loss": 0.12826939843068874,
|
118 |
+
"exact_match": 0.5564516129032258,
|
119 |
+
"specificity": 0.8596476657420098,
|
120 |
+
"class_metrics": {
|
121 |
+
"toxic": {
|
122 |
+
"auc": 0.9621138334064959,
|
123 |
+
"threshold": 0.46047261357307434,
|
124 |
+
"precision": 0.8825137733163603,
|
125 |
+
"recall": 0.9342830882352909,
|
126 |
+
"f1": 0.9076608519017388,
|
127 |
+
"specificity": 0.8756218905472631,
|
128 |
+
"npv": 0.9301878222768437,
|
129 |
+
"positive_samples": 2176,
|
130 |
+
"true_positives": 2143,
|
131 |
+
"false_positives": 285,
|
132 |
+
"true_negatives": 2008,
|
133 |
+
"false_negatives": 150,
|
134 |
+
"auc_ci": [
|
135 |
+
0.9621138334064959,
|
136 |
+
0.9621138334064959
|
137 |
+
],
|
138 |
+
"precision_ci": [
|
139 |
+
0.8825137733163603,
|
140 |
+
0.8825137733163603
|
141 |
+
],
|
142 |
+
"recall_ci": [
|
143 |
+
0.9342830882352909,
|
144 |
+
0.9342830882352909
|
145 |
+
],
|
146 |
+
"f1_ci": [
|
147 |
+
0.9076608519017388,
|
148 |
+
0.9076608519017388
|
149 |
+
],
|
150 |
+
"specificity_ci": [
|
151 |
+
0.8756218905472631,
|
152 |
+
0.8756218905472631
|
153 |
+
],
|
154 |
+
"npv_ci": [
|
155 |
+
0.9301878222768437,
|
156 |
+
0.9301878222768437
|
157 |
+
],
|
158 |
+
"class_weights": {
|
159 |
+
"0.0": 0.951077943615257,
|
160 |
+
"1.0": 1.0542279411764706
|
161 |
+
}
|
162 |
+
},
|
163 |
+
"severe_toxic": {
|
164 |
+
"auc": 0.9499761279127715,
|
165 |
+
"threshold": 0.03537772223353386,
|
166 |
+
"precision": 0.8608043862269837,
|
167 |
+
"recall": 0.9492385786802037,
|
168 |
+
"f1": 0.9028611452277716,
|
169 |
+
"specificity": 0.8465042131632855,
|
170 |
+
"npv": 0.9434265401805545,
|
171 |
+
"positive_samples": 197,
|
172 |
+
"true_positives": 2177,
|
173 |
+
"false_positives": 352,
|
174 |
+
"true_negatives": 1941,
|
175 |
+
"false_negatives": 116,
|
176 |
+
"auc_ci": [
|
177 |
+
0.9499761279127715,
|
178 |
+
0.9499761279127715
|
179 |
+
],
|
180 |
+
"precision_ci": [
|
181 |
+
0.8608043862269837,
|
182 |
+
0.8608043862269837
|
183 |
+
],
|
184 |
+
"recall_ci": [
|
185 |
+
0.9492385786802037,
|
186 |
+
0.9492385786802037
|
187 |
+
],
|
188 |
+
"f1_ci": [
|
189 |
+
0.9028611452277716,
|
190 |
+
0.9028611452277716
|
191 |
+
],
|
192 |
+
"specificity_ci": [
|
193 |
+
0.8465042131632855,
|
194 |
+
0.8465042131632855
|
195 |
+
],
|
196 |
+
"npv_ci": [
|
197 |
+
0.9434265401805545,
|
198 |
+
0.9434265401805545
|
199 |
+
],
|
200 |
+
"class_weights": {
|
201 |
+
"0.0": 0.5224322477795491,
|
202 |
+
"1.0": 11.644670050761421
|
203 |
+
}
|
204 |
+
},
|
205 |
+
"obscene": {
|
206 |
+
"auc": 0.9572805958351019,
|
207 |
+
"threshold": 0.2777131497859955,
|
208 |
+
"precision": 0.8724828332798461,
|
209 |
+
"recall": 0.9115977291159771,
|
210 |
+
"f1": 0.8916114958872817,
|
211 |
+
"specificity": 0.8667660208643849,
|
212 |
+
"npv": 0.9074484866722257,
|
213 |
+
"positive_samples": 1233,
|
214 |
+
"true_positives": 2091,
|
215 |
+
"false_positives": 305,
|
216 |
+
"true_negatives": 1988,
|
217 |
+
"false_negatives": 202,
|
218 |
+
"auc_ci": [
|
219 |
+
0.9572805958351019,
|
220 |
+
0.9572805958351019
|
221 |
+
],
|
222 |
+
"precision_ci": [
|
223 |
+
0.8724828332798461,
|
224 |
+
0.8724828332798461
|
225 |
+
],
|
226 |
+
"recall_ci": [
|
227 |
+
0.9115977291159771,
|
228 |
+
0.9115977291159771
|
229 |
+
],
|
230 |
+
"f1_ci": [
|
231 |
+
0.8916114958872817,
|
232 |
+
0.8916114958872817
|
233 |
+
],
|
234 |
+
"specificity_ci": [
|
235 |
+
0.8667660208643849,
|
236 |
+
0.8667660208643849
|
237 |
+
],
|
238 |
+
"npv_ci": [
|
239 |
+
0.9074484866722257,
|
240 |
+
0.9074484866722257
|
241 |
+
],
|
242 |
+
"class_weights": {
|
243 |
+
"0.0": 0.6837555886736214,
|
244 |
+
"1.0": 1.8605028386050284
|
245 |
+
}
|
246 |
+
},
|
247 |
+
"threat": {
|
248 |
+
"auc": 0.9697358146798531,
|
249 |
+
"threshold": 0.016539234668016434,
|
250 |
+
"precision": 0.9045252081854022,
|
251 |
+
"recall": 0.9117647058823535,
|
252 |
+
"f1": 0.9081305291811165,
|
253 |
+
"specificity": 0.9037610619468958,
|
254 |
+
"npv": 0.9110528041980915,
|
255 |
+
"positive_samples": 68,
|
256 |
+
"true_positives": 2091,
|
257 |
+
"false_positives": 220,
|
258 |
+
"true_negatives": 2073,
|
259 |
+
"false_negatives": 202,
|
260 |
+
"auc_ci": [
|
261 |
+
0.9697358146798531,
|
262 |
+
0.9697358146798531
|
263 |
+
],
|
264 |
+
"precision_ci": [
|
265 |
+
0.9045252081854022,
|
266 |
+
0.9045252081854022
|
267 |
+
],
|
268 |
+
"recall_ci": [
|
269 |
+
0.9117647058823535,
|
270 |
+
0.9117647058823535
|
271 |
+
],
|
272 |
+
"f1_ci": [
|
273 |
+
0.9081305291811165,
|
274 |
+
0.9081305291811165
|
275 |
+
],
|
276 |
+
"specificity_ci": [
|
277 |
+
0.9037610619468958,
|
278 |
+
0.9037610619468958
|
279 |
+
],
|
280 |
+
"npv_ci": [
|
281 |
+
0.9110528041980915,
|
282 |
+
0.9110528041980915
|
283 |
+
],
|
284 |
+
"class_weights": {
|
285 |
+
"0.0": 0.5075221238938054,
|
286 |
+
"1.0": 33.73529411764706
|
287 |
+
}
|
288 |
+
},
|
289 |
+
"insult": {
|
290 |
+
"auc": 0.935014291573492,
|
291 |
+
"threshold": 0.25907590985298157,
|
292 |
+
"precision": 0.833978890287596,
|
293 |
+
"recall": 0.9098862642169729,
|
294 |
+
"f1": 0.8702805202104968,
|
295 |
+
"specificity": 0.8188679245282912,
|
296 |
+
"npv": 0.900862976980011,
|
297 |
+
"positive_samples": 1143,
|
298 |
+
"true_positives": 2087,
|
299 |
+
"false_positives": 415,
|
300 |
+
"true_negatives": 1878,
|
301 |
+
"false_negatives": 206,
|
302 |
+
"auc_ci": [
|
303 |
+
0.935014291573492,
|
304 |
+
0.935014291573492
|
305 |
+
],
|
306 |
+
"precision_ci": [
|
307 |
+
0.833978890287596,
|
308 |
+
0.833978890287596
|
309 |
+
],
|
310 |
+
"recall_ci": [
|
311 |
+
0.9098862642169729,
|
312 |
+
0.9098862642169729
|
313 |
+
],
|
314 |
+
"f1_ci": [
|
315 |
+
0.8702805202104968,
|
316 |
+
0.8702805202104968
|
317 |
+
],
|
318 |
+
"specificity_ci": [
|
319 |
+
0.8188679245282912,
|
320 |
+
0.8188679245282912
|
321 |
+
],
|
322 |
+
"npv_ci": [
|
323 |
+
0.900862976980011,
|
324 |
+
0.900862976980011
|
325 |
+
],
|
326 |
+
"class_weights": {
|
327 |
+
"0.0": 0.6658925979680697,
|
328 |
+
"1.0": 2.0069991251093615
|
329 |
+
}
|
330 |
+
},
|
331 |
+
"identity_hate": {
|
332 |
+
"auc": 0.9686336850292078,
|
333 |
+
"threshold": 0.026042653247714043,
|
334 |
+
"precision": 0.8623651962191886,
|
335 |
+
"recall": 0.9626168224299065,
|
336 |
+
"f1": 0.909737451082551,
|
337 |
+
"specificity": 0.8463648834019236,
|
338 |
+
"npv": 0.9576992819322562,
|
339 |
+
"positive_samples": 214,
|
340 |
+
"true_positives": 2208,
|
341 |
+
"false_positives": 352,
|
342 |
+
"true_negatives": 1941,
|
343 |
+
"false_negatives": 85,
|
344 |
+
"auc_ci": [
|
345 |
+
0.9686336850292078,
|
346 |
+
0.9686336850292078
|
347 |
+
],
|
348 |
+
"precision_ci": [
|
349 |
+
0.8623651962191886,
|
350 |
+
0.8623651962191886
|
351 |
+
],
|
352 |
+
"recall_ci": [
|
353 |
+
0.9626168224299065,
|
354 |
+
0.9626168224299065
|
355 |
+
],
|
356 |
+
"f1_ci": [
|
357 |
+
0.909737451082551,
|
358 |
+
0.909737451082551
|
359 |
+
],
|
360 |
+
"specificity_ci": [
|
361 |
+
0.8463648834019236,
|
362 |
+
0.8463648834019236
|
363 |
+
],
|
364 |
+
"npv_ci": [
|
365 |
+
0.9576992819322562,
|
366 |
+
0.9576992819322562
|
367 |
+
],
|
368 |
+
"class_weights": {
|
369 |
+
"0.0": 0.5244627343392776,
|
370 |
+
"1.0": 10.719626168224298
|
371 |
+
}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"sample_count": 4588
|
375 |
+
},
|
376 |
+
"1": {
|
377 |
+
"auc": 0.9420109561343032,
|
378 |
+
"precision": 0.7054445371054338,
|
379 |
+
"recall": 0.8937771830043493,
|
380 |
+
"f1": 0.7655260008199765,
|
381 |
+
"hamming_loss": 0.16467680852429553,
|
382 |
+
"exact_match": 0.49354900828037745,
|
383 |
+
"specificity": 0.8275039240639036,
|
384 |
+
"class_metrics": {
|
385 |
+
"toxic": {
|
386 |
+
"auc": 0.970066021237747,
|
387 |
+
"threshold": 0.44148319959640503,
|
388 |
+
"precision": 0.9051201281749973,
|
389 |
+
"recall": 0.916216216216217,
|
390 |
+
"f1": 0.910634371966946,
|
391 |
+
"specificity": 0.903956972723781,
|
392 |
+
"npv": 0.9151763423430814,
|
393 |
+
"positive_samples": 2590,
|
394 |
+
"true_positives": 2378,
|
395 |
+
"false_positives": 249,
|
396 |
+
"true_negatives": 2347,
|
397 |
+
"false_negatives": 217,
|
398 |
+
"auc_ci": [
|
399 |
+
0.970066021237747,
|
400 |
+
0.970066021237747
|
401 |
+
],
|
402 |
+
"precision_ci": [
|
403 |
+
0.9051201281749973,
|
404 |
+
0.9051201281749973
|
405 |
+
],
|
406 |
+
"recall_ci": [
|
407 |
+
0.916216216216217,
|
408 |
+
0.916216216216217
|
409 |
+
],
|
410 |
+
"f1_ci": [
|
411 |
+
0.910634371966946,
|
412 |
+
0.910634371966946
|
413 |
+
],
|
414 |
+
"specificity_ci": [
|
415 |
+
0.903956972723781,
|
416 |
+
0.903956972723781
|
417 |
+
],
|
418 |
+
"npv_ci": [
|
419 |
+
0.9151763423430814,
|
420 |
+
0.9151763423430814
|
421 |
+
],
|
422 |
+
"class_weights": {
|
423 |
+
"0.0": 0.9975028812908183,
|
424 |
+
"1.0": 1.0025096525096524
|
425 |
+
}
|
426 |
+
},
|
427 |
+
"severe_toxic": {
|
428 |
+
"auc": 0.9032119421376688,
|
429 |
+
"threshold": 0.03648429363965988,
|
430 |
+
"precision": 0.8147008122253235,
|
431 |
+
"recall": 0.8688524590163955,
|
432 |
+
"f1": 0.8409057392553343,
|
433 |
+
"specificity": 0.8023843200646473,
|
434 |
+
"npv": 0.8595146599106457,
|
435 |
+
"positive_samples": 244,
|
436 |
+
"true_positives": 2255,
|
437 |
+
"false_positives": 513,
|
438 |
+
"true_negatives": 2083,
|
439 |
+
"false_negatives": 340,
|
440 |
+
"auc_ci": [
|
441 |
+
0.9032119421376688,
|
442 |
+
0.9032119421376688
|
443 |
+
],
|
444 |
+
"precision_ci": [
|
445 |
+
0.8147008122253235,
|
446 |
+
0.8147008122253235
|
447 |
+
],
|
448 |
+
"recall_ci": [
|
449 |
+
0.8688524590163955,
|
450 |
+
0.8688524590163955
|
451 |
+
],
|
452 |
+
"f1_ci": [
|
453 |
+
0.8409057392553343,
|
454 |
+
0.8409057392553343
|
455 |
+
],
|
456 |
+
"specificity_ci": [
|
457 |
+
0.8023843200646473,
|
458 |
+
0.8023843200646473
|
459 |
+
],
|
460 |
+
"npv_ci": [
|
461 |
+
0.8595146599106457,
|
462 |
+
0.8595146599106457
|
463 |
+
],
|
464 |
+
"class_weights": {
|
465 |
+
"0.0": 0.5246514447363103,
|
466 |
+
"1.0": 10.64139344262295
|
467 |
+
}
|
468 |
+
},
|
469 |
+
"obscene": {
|
470 |
+
"auc": 0.9387485218400086,
|
471 |
+
"threshold": 0.1990610957145691,
|
472 |
+
"precision": 0.8573644543610149,
|
473 |
+
"recall": 0.8723747980614001,
|
474 |
+
"f1": 0.8648044977770555,
|
475 |
+
"specificity": 0.8548672566371623,
|
476 |
+
"npv": 0.8701005785595336,
|
477 |
+
"positive_samples": 1238,
|
478 |
+
"true_positives": 2265,
|
479 |
+
"false_positives": 376,
|
480 |
+
"true_negatives": 2219,
|
481 |
+
"false_negatives": 331,
|
482 |
+
"auc_ci": [
|
483 |
+
0.9387485218400086,
|
484 |
+
0.9387485218400086
|
485 |
+
],
|
486 |
+
"precision_ci": [
|
487 |
+
0.8573644543610149,
|
488 |
+
0.8573644543610149
|
489 |
+
],
|
490 |
+
"recall_ci": [
|
491 |
+
0.8723747980614001,
|
492 |
+
0.8723747980614001
|
493 |
+
],
|
494 |
+
"f1_ci": [
|
495 |
+
0.8648044977770555,
|
496 |
+
0.8648044977770555
|
497 |
+
],
|
498 |
+
"specificity_ci": [
|
499 |
+
0.8548672566371623,
|
500 |
+
0.8548672566371623
|
501 |
+
],
|
502 |
+
"npv_ci": [
|
503 |
+
0.8701005785595336,
|
504 |
+
0.8701005785595336
|
505 |
+
],
|
506 |
+
"class_weights": {
|
507 |
+
"0.0": 0.6565107458912769,
|
508 |
+
"1.0": 2.097334410339257
|
509 |
+
}
|
510 |
+
},
|
511 |
+
"threat": {
|
512 |
+
"auc": 0.930141945247047,
|
513 |
+
"threshold": 0.012619060464203358,
|
514 |
+
"precision": 0.8505847769217403,
|
515 |
+
"recall": 0.8773584905660369,
|
516 |
+
"f1": 0.8637642103418028,
|
517 |
+
"specificity": 0.8458816591311225,
|
518 |
+
"npv": 0.8733726632315268,
|
519 |
+
"positive_samples": 106,
|
520 |
+
"true_positives": 2278,
|
521 |
+
"false_positives": 400,
|
522 |
+
"true_negatives": 2196,
|
523 |
+
"false_negatives": 318,
|
524 |
+
"auc_ci": [
|
525 |
+
0.930141945247047,
|
526 |
+
0.930141945247047
|
527 |
+
],
|
528 |
+
"precision_ci": [
|
529 |
+
0.8505847769217403,
|
530 |
+
0.8505847769217403
|
531 |
+
],
|
532 |
+
"recall_ci": [
|
533 |
+
0.8773584905660369,
|
534 |
+
0.8773584905660369
|
535 |
+
],
|
536 |
+
"f1_ci": [
|
537 |
+
0.8637642103418028,
|
538 |
+
0.8637642103418028
|
539 |
+
],
|
540 |
+
"specificity_ci": [
|
541 |
+
0.8458816591311225,
|
542 |
+
0.8458816591311225
|
543 |
+
],
|
544 |
+
"npv_ci": [
|
545 |
+
0.8733726632315268,
|
546 |
+
0.8733726632315268
|
547 |
+
],
|
548 |
+
"class_weights": {
|
549 |
+
"0.0": 0.5104187143699627,
|
550 |
+
"1.0": 24.495283018867923
|
551 |
+
}
|
552 |
+
},
|
553 |
+
"insult": {
|
554 |
+
"auc": 0.9116567628368878,
|
555 |
+
"threshold": 0.24214455485343933,
|
556 |
+
"precision": 0.8063856025869378,
|
557 |
+
"recall": 0.8794466403162026,
|
558 |
+
"f1": 0.8413329522908936,
|
559 |
+
"specificity": 0.7888435374149729,
|
560 |
+
"npv": 0.8674359236672227,
|
561 |
+
"positive_samples": 1518,
|
562 |
+
"true_positives": 2283,
|
563 |
+
"false_positives": 548,
|
564 |
+
"true_negatives": 2048,
|
565 |
+
"false_negatives": 313,
|
566 |
+
"auc_ci": [
|
567 |
+
0.9116567628368878,
|
568 |
+
0.9116567628368878
|
569 |
+
],
|
570 |
+
"precision_ci": [
|
571 |
+
0.8063856025869378,
|
572 |
+
0.8063856025869378
|
573 |
+
],
|
574 |
+
"recall_ci": [
|
575 |
+
0.8794466403162026,
|
576 |
+
0.8794466403162026
|
577 |
+
],
|
578 |
+
"f1_ci": [
|
579 |
+
0.8413329522908936,
|
580 |
+
0.8413329522908936
|
581 |
+
],
|
582 |
+
"specificity_ci": [
|
583 |
+
0.7888435374149729,
|
584 |
+
0.7888435374149729
|
585 |
+
],
|
586 |
+
"npv_ci": [
|
587 |
+
0.8674359236672227,
|
588 |
+
0.8674359236672227
|
589 |
+
],
|
590 |
+
"class_weights": {
|
591 |
+
"0.0": 0.706530612244898,
|
592 |
+
"1.0": 1.7104743083003953
|
593 |
+
}
|
594 |
+
},
|
595 |
+
"identity_hate": {
|
596 |
+
"auc": 0.9000925697269513,
|
597 |
+
"threshold": 0.03167847916483879,
|
598 |
+
"precision": 0.7933569321076599,
|
599 |
+
"recall": 0.8865248226950354,
|
600 |
+
"f1": 0.8373572860825882,
|
601 |
+
"specificity": 0.7690897984117396,
|
602 |
+
"npv": 0.8714256962068888,
|
603 |
+
"positive_samples": 282,
|
604 |
+
"true_positives": 2301,
|
605 |
+
"false_positives": 599,
|
606 |
+
"true_negatives": 1996,
|
607 |
+
"false_negatives": 294,
|
608 |
+
"auc_ci": [
|
609 |
+
0.9000925697269513,
|
610 |
+
0.9000925697269513
|
611 |
+
],
|
612 |
+
"precision_ci": [
|
613 |
+
0.7933569321076599,
|
614 |
+
0.7933569321076599
|
615 |
+
],
|
616 |
+
"recall_ci": [
|
617 |
+
0.8865248226950354,
|
618 |
+
0.8865248226950354
|
619 |
+
],
|
620 |
+
"f1_ci": [
|
621 |
+
0.8373572860825882,
|
622 |
+
0.8373572860825882
|
623 |
+
],
|
624 |
+
"specificity_ci": [
|
625 |
+
0.7690897984117396,
|
626 |
+
0.7690897984117396
|
627 |
+
],
|
628 |
+
"npv_ci": [
|
629 |
+
0.8714256962068888,
|
630 |
+
0.8714256962068888
|
631 |
+
],
|
632 |
+
"class_weights": {
|
633 |
+
"0.0": 0.5287110568112401,
|
634 |
+
"1.0": 9.207446808510639
|
635 |
+
}
|
636 |
+
}
|
637 |
+
},
|
638 |
+
"sample_count": 5193
|
639 |
+
},
|
640 |
+
"2": {
|
641 |
+
"auc": 0.9291857688264461,
|
642 |
+
"precision": 0.6563281876729908,
|
643 |
+
"recall": 0.9071871335232032,
|
644 |
+
"f1": 0.7348671832220326,
|
645 |
+
"hamming_loss": 0.20595261153076377,
|
646 |
+
"exact_match": 0.4263025372845245,
|
647 |
+
"specificity": 0.7733622212755961,
|
648 |
+
"class_metrics": {
|
649 |
+
"toxic": {
|
650 |
+
"auc": 0.962186696069825,
|
651 |
+
"threshold": 0.3978160321712494,
|
652 |
+
"precision": 0.8937958373522624,
|
653 |
+
"recall": 0.9136996904024615,
|
654 |
+
"f1": 0.9036381748465286,
|
655 |
+
"specificity": 0.8914307871267977,
|
656 |
+
"npv": 0.9117341057406776,
|
657 |
+
"positive_samples": 2584,
|
658 |
+
"true_positives": 2358,
|
659 |
+
"false_positives": 280,
|
660 |
+
"true_negatives": 2301,
|
661 |
+
"false_negatives": 222,
|
662 |
+
"auc_ci": [
|
663 |
+
0.962186696069825,
|
664 |
+
0.962186696069825
|
665 |
+
],
|
666 |
+
"precision_ci": [
|
667 |
+
0.8937958373522624,
|
668 |
+
0.8937958373522624
|
669 |
+
],
|
670 |
+
"recall_ci": [
|
671 |
+
0.9136996904024615,
|
672 |
+
0.9136996904024615
|
673 |
+
],
|
674 |
+
"f1_ci": [
|
675 |
+
0.9036381748465286,
|
676 |
+
0.9036381748465286
|
677 |
+
],
|
678 |
+
"specificity_ci": [
|
679 |
+
0.8914307871267977,
|
680 |
+
0.8914307871267977
|
681 |
+
],
|
682 |
+
"npv_ci": [
|
683 |
+
0.9117341057406776,
|
684 |
+
0.9117341057406776
|
685 |
+
],
|
686 |
+
"class_weights": {
|
687 |
+
"0.0": 1.0009693679720821,
|
688 |
+
"1.0": 0.9990325077399381
|
689 |
+
}
|
690 |
+
},
|
691 |
+
"severe_toxic": {
|
692 |
+
"auc": 0.890519864426667,
|
693 |
+
"threshold": 0.015000982210040092,
|
694 |
+
"precision": 0.7460680730510791,
|
695 |
+
"recall": 0.918032786885247,
|
696 |
+
"f1": 0.8231651924456013,
|
697 |
+
"specificity": 0.6875381175035498,
|
698 |
+
"npv": 0.8934806428840502,
|
699 |
+
"positive_samples": 244,
|
700 |
+
"true_positives": 2369,
|
701 |
+
"false_positives": 806,
|
702 |
+
"true_negatives": 1774,
|
703 |
+
"false_negatives": 211,
|
704 |
+
"auc_ci": [
|
705 |
+
0.890519864426667,
|
706 |
+
0.890519864426667
|
707 |
+
],
|
708 |
+
"precision_ci": [
|
709 |
+
0.7460680730510791,
|
710 |
+
0.7460680730510791
|
711 |
+
],
|
712 |
+
"recall_ci": [
|
713 |
+
0.918032786885247,
|
714 |
+
0.918032786885247
|
715 |
+
],
|
716 |
+
"f1_ci": [
|
717 |
+
0.8231651924456013,
|
718 |
+
0.8231651924456013
|
719 |
+
],
|
720 |
+
"specificity_ci": [
|
721 |
+
0.6875381175035498,
|
722 |
+
0.6875381175035498
|
723 |
+
],
|
724 |
+
"npv_ci": [
|
725 |
+
0.8934806428840502,
|
726 |
+
0.8934806428840502
|
727 |
+
],
|
728 |
+
"class_weights": {
|
729 |
+
"0.0": 0.5248017889815003,
|
730 |
+
"1.0": 10.579918032786885
|
731 |
+
}
|
732 |
+
},
|
733 |
+
"obscene": {
|
734 |
+
"auc": 0.9233059279915251,
|
735 |
+
"threshold": 0.11362762749195099,
|
736 |
+
"precision": 0.7873800414823968,
|
737 |
+
"recall": 0.9095315024232634,
|
738 |
+
"f1": 0.8440592612850891,
|
739 |
+
"specificity": 0.7543949044586057,
|
740 |
+
"npv": 0.892919379205219,
|
741 |
+
"positive_samples": 1238,
|
742 |
+
"true_positives": 2347,
|
743 |
+
"false_positives": 634,
|
744 |
+
"true_negatives": 1947,
|
745 |
+
"false_negatives": 233,
|
746 |
+
"auc_ci": [
|
747 |
+
0.9233059279915251,
|
748 |
+
0.9233059279915251
|
749 |
+
],
|
750 |
+
"precision_ci": [
|
751 |
+
0.7873800414823968,
|
752 |
+
0.7873800414823968
|
753 |
+
],
|
754 |
+
"recall_ci": [
|
755 |
+
0.9095315024232634,
|
756 |
+
0.9095315024232634
|
757 |
+
],
|
758 |
+
"f1_ci": [
|
759 |
+
0.8440592612850891,
|
760 |
+
0.8440592612850891
|
761 |
+
],
|
762 |
+
"specificity_ci": [
|
763 |
+
0.7543949044586057,
|
764 |
+
0.7543949044586057
|
765 |
+
],
|
766 |
+
"npv_ci": [
|
767 |
+
0.892919379205219,
|
768 |
+
0.892919379205219
|
769 |
+
],
|
770 |
+
"class_weights": {
|
771 |
+
"0.0": 0.6577070063694268,
|
772 |
+
"1.0": 2.0852180936995155
|
773 |
+
}
|
774 |
+
},
|
775 |
+
"threat": {
|
776 |
+
"auc": 0.848578598380765,
|
777 |
+
"threshold": 0.008195769973099232,
|
778 |
+
"precision": 0.7785886139481758,
|
779 |
+
"recall": 0.8055555555555555,
|
780 |
+
"f1": 0.791842555156752,
|
781 |
+
"specificity": 0.7709198813056214,
|
782 |
+
"npv": 0.7985792107105536,
|
783 |
+
"positive_samples": 108,
|
784 |
+
"true_positives": 2079,
|
785 |
+
"false_positives": 591,
|
786 |
+
"true_negatives": 1990,
|
787 |
+
"false_negatives": 501,
|
788 |
+
"auc_ci": [
|
789 |
+
0.848578598380765,
|
790 |
+
0.848578598380765
|
791 |
+
],
|
792 |
+
"precision_ci": [
|
793 |
+
0.7785886139481758,
|
794 |
+
0.7785886139481758
|
795 |
+
],
|
796 |
+
"recall_ci": [
|
797 |
+
0.8055555555555555,
|
798 |
+
0.8055555555555555
|
799 |
+
],
|
800 |
+
"f1_ci": [
|
801 |
+
0.791842555156752,
|
802 |
+
0.791842555156752
|
803 |
+
],
|
804 |
+
"specificity_ci": [
|
805 |
+
0.7709198813056214,
|
806 |
+
0.7709198813056214
|
807 |
+
],
|
808 |
+
"npv_ci": [
|
809 |
+
0.7985792107105536,
|
810 |
+
0.7985792107105536
|
811 |
+
],
|
812 |
+
"class_weights": {
|
813 |
+
"0.0": 0.5106824925816024,
|
814 |
+
"1.0": 23.90277777777778
|
815 |
+
}
|
816 |
+
},
|
817 |
+
"insult": {
|
818 |
+
"auc": 0.8943137096607889,
|
819 |
+
"threshold": 0.1587354838848114,
|
820 |
+
"precision": 0.7484673378377763,
|
821 |
+
"recall": 0.9141347424042362,
|
822 |
+
"f1": 0.8230472043830551,
|
823 |
+
"specificity": 0.6927925459029957,
|
824 |
+
"npv": 0.889726581805318,
|
825 |
+
"positive_samples": 1514,
|
826 |
+
"true_positives": 2359,
|
827 |
+
"false_positives": 793,
|
828 |
+
"true_negatives": 1788,
|
829 |
+
"false_negatives": 221,
|
830 |
+
"auc_ci": [
|
831 |
+
0.8943137096607889,
|
832 |
+
0.8943137096607889
|
833 |
+
],
|
834 |
+
"precision_ci": [
|
835 |
+
0.7484673378377763,
|
836 |
+
0.7484673378377763
|
837 |
+
],
|
838 |
+
"recall_ci": [
|
839 |
+
0.9141347424042362,
|
840 |
+
0.9141347424042362
|
841 |
+
],
|
842 |
+
"f1_ci": [
|
843 |
+
0.8230472043830551,
|
844 |
+
0.8230472043830551
|
845 |
+
],
|
846 |
+
"specificity_ci": [
|
847 |
+
0.6927925459029957,
|
848 |
+
0.6927925459029957
|
849 |
+
],
|
850 |
+
"npv_ci": [
|
851 |
+
0.889726581805318,
|
852 |
+
0.889726581805318
|
853 |
+
],
|
854 |
+
"class_weights": {
|
855 |
+
"0.0": 0.7074540970128802,
|
856 |
+
"1.0": 1.7050858652575958
|
857 |
+
}
|
858 |
+
},
|
859 |
+
"identity_hate": {
|
860 |
+
"auc": 0.9040654827596841,
|
861 |
+
"threshold": 0.0467526838183403,
|
862 |
+
"precision": 0.8408828817107497,
|
863 |
+
"recall": 0.8291814946619218,
|
864 |
+
"f1": 0.8349911950184066,
|
865 |
+
"specificity": 0.8430970913560043,
|
866 |
+
"npv": 0.8315259121222329,
|
867 |
+
"positive_samples": 281,
|
868 |
+
"true_positives": 2140,
|
869 |
+
"false_positives": 405,
|
870 |
+
"true_negatives": 2176,
|
871 |
+
"false_negatives": 440,
|
872 |
+
"auc_ci": [
|
873 |
+
0.9040654827596841,
|
874 |
+
0.9040654827596841
|
875 |
+
],
|
876 |
+
"precision_ci": [
|
877 |
+
0.8408828817107497,
|
878 |
+
0.8408828817107497
|
879 |
+
],
|
880 |
+
"recall_ci": [
|
881 |
+
0.8291814946619218,
|
882 |
+
0.8291814946619218
|
883 |
+
],
|
884 |
+
"f1_ci": [
|
885 |
+
0.8349911950184066,
|
886 |
+
0.8349911950184066
|
887 |
+
],
|
888 |
+
"specificity_ci": [
|
889 |
+
0.8430970913560043,
|
890 |
+
0.8430970913560043
|
891 |
+
],
|
892 |
+
"npv_ci": [
|
893 |
+
0.8315259121222329,
|
894 |
+
0.8315259121222329
|
895 |
+
],
|
896 |
+
"class_weights": {
|
897 |
+
"0.0": 0.5287791888570258,
|
898 |
+
"1.0": 9.186832740213523
|
899 |
+
}
|
900 |
+
}
|
901 |
+
},
|
902 |
+
"sample_count": 5163
|
903 |
+
},
|
904 |
+
"3": {
|
905 |
+
"auc": 0.9472472410532857,
|
906 |
+
"precision": 0.6982701786686969,
|
907 |
+
"recall": 0.9152656355077337,
|
908 |
+
"f1": 0.7674148586410611,
|
909 |
+
"hamming_loss": 0.1731811145510836,
|
910 |
+
"exact_match": 0.48471362229102166,
|
911 |
+
"specificity": 0.8133241121366614,
|
912 |
+
"class_metrics": {
|
913 |
+
"toxic": {
|
914 |
+
"auc": 0.9747483574660619,
|
915 |
+
"threshold": 0.5033379793167114,
|
916 |
+
"precision": 0.9204374197691823,
|
917 |
+
"recall": 0.9294300116324036,
|
918 |
+
"f1": 0.9249118582673775,
|
919 |
+
"specificity": 0.9196601004248757,
|
920 |
+
"npv": 0.9287337466652424,
|
921 |
+
"positive_samples": 2579,
|
922 |
+
"true_positives": 2401,
|
923 |
+
"false_positives": 207,
|
924 |
+
"true_negatives": 2376,
|
925 |
+
"false_negatives": 182,
|
926 |
+
"auc_ci": [
|
927 |
+
0.9747483574660619,
|
928 |
+
0.9747483574660619
|
929 |
+
],
|
930 |
+
"precision_ci": [
|
931 |
+
0.9204374197691823,
|
932 |
+
0.9204374197691823
|
933 |
+
],
|
934 |
+
"recall_ci": [
|
935 |
+
0.9294300116324036,
|
936 |
+
0.9294300116324036
|
937 |
+
],
|
938 |
+
"f1_ci": [
|
939 |
+
0.9249118582673775,
|
940 |
+
0.9249118582673775
|
941 |
+
],
|
942 |
+
"specificity_ci": [
|
943 |
+
0.9196601004248757,
|
944 |
+
0.9196601004248757
|
945 |
+
],
|
946 |
+
"npv_ci": [
|
947 |
+
0.9287337466652424,
|
948 |
+
0.9287337466652424
|
949 |
+
],
|
950 |
+
"class_weights": {
|
951 |
+
"0.0": 0.9980687524140595,
|
952 |
+
"1.0": 1.0019387359441645
|
953 |
+
}
|
954 |
+
},
|
955 |
+
"severe_toxic": {
|
956 |
+
"auc": 0.9073687265747961,
|
957 |
+
"threshold": 0.021415209397673607,
|
958 |
+
"precision": 0.7618540559183846,
|
959 |
+
"recall": 0.93388429752066,
|
960 |
+
"f1": 0.8391430651806406,
|
961 |
+
"specificity": 0.7080795777506993,
|
962 |
+
"npv": 0.9146007419992344,
|
963 |
+
"positive_samples": 242,
|
964 |
+
"true_positives": 2413,
|
965 |
+
"false_positives": 754,
|
966 |
+
"true_negatives": 1829,
|
967 |
+
"false_negatives": 170,
|
968 |
+
"auc_ci": [
|
969 |
+
0.9073687265747961,
|
970 |
+
0.9073687265747961
|
971 |
+
],
|
972 |
+
"precision_ci": [
|
973 |
+
0.7618540559183846,
|
974 |
+
0.7618540559183846
|
975 |
+
],
|
976 |
+
"recall_ci": [
|
977 |
+
0.93388429752066,
|
978 |
+
0.93388429752066
|
979 |
+
],
|
980 |
+
"f1_ci": [
|
981 |
+
0.8391430651806406,
|
982 |
+
0.8391430651806406
|
983 |
+
],
|
984 |
+
"specificity_ci": [
|
985 |
+
0.7080795777506993,
|
986 |
+
0.7080795777506993
|
987 |
+
],
|
988 |
+
"npv_ci": [
|
989 |
+
0.9146007419992344,
|
990 |
+
0.9146007419992344
|
991 |
+
],
|
992 |
+
"class_weights": {
|
993 |
+
"0.0": 0.5245635403978888,
|
994 |
+
"1.0": 10.677685950413224
|
995 |
+
}
|
996 |
+
},
|
997 |
+
"obscene": {
|
998 |
+
"auc": 0.9429228614622618,
|
999 |
+
"threshold": 0.14896434545516968,
|
1000 |
+
"precision": 0.822101549733319,
|
1001 |
+
"recall": 0.9148418491484125,
|
1002 |
+
"f1": 0.8659958665665364,
|
1003 |
+
"specificity": 0.8020330368488026,
|
1004 |
+
"npv": 0.9040137548341648,
|
1005 |
+
"positive_samples": 1233,
|
1006 |
+
"true_positives": 2363,
|
1007 |
+
"false_positives": 511,
|
1008 |
+
"true_negatives": 2072,
|
1009 |
+
"false_negatives": 220,
|
1010 |
+
"auc_ci": [
|
1011 |
+
0.9429228614622618,
|
1012 |
+
0.9429228614622618
|
1013 |
+
],
|
1014 |
+
"precision_ci": [
|
1015 |
+
0.822101549733319,
|
1016 |
+
0.822101549733319
|
1017 |
+
],
|
1018 |
+
"recall_ci": [
|
1019 |
+
0.9148418491484125,
|
1020 |
+
0.9148418491484125
|
1021 |
+
],
|
1022 |
+
"f1_ci": [
|
1023 |
+
0.8659958665665364,
|
1024 |
+
0.8659958665665364
|
1025 |
+
],
|
1026 |
+
"specificity_ci": [
|
1027 |
+
0.8020330368488026,
|
1028 |
+
0.8020330368488026
|
1029 |
+
],
|
1030 |
+
"npv_ci": [
|
1031 |
+
0.9040137548341648,
|
1032 |
+
0.9040137548341648
|
1033 |
+
],
|
1034 |
+
"class_weights": {
|
1035 |
+
"0.0": 0.6566709021601016,
|
1036 |
+
"1.0": 2.095701540957015
|
1037 |
+
}
|
1038 |
+
},
|
1039 |
+
"threat": {
|
1040 |
+
"auc": 0.8985232762406729,
|
1041 |
+
"threshold": 0.013273251242935658,
|
1042 |
+
"precision": 0.8299773755655987,
|
1043 |
+
"recall": 0.8055555555555544,
|
1044 |
+
"f1": 0.8175841319366995,
|
1045 |
+
"specificity": 0.8349802371541444,
|
1046 |
+
"npv": 0.8111134812286639,
|
1047 |
+
"positive_samples": 108,
|
1048 |
+
"true_positives": 2081,
|
1049 |
+
"false_positives": 426,
|
1050 |
+
"true_negatives": 2157,
|
1051 |
+
"false_negatives": 502,
|
1052 |
+
"auc_ci": [
|
1053 |
+
0.8985232762406729,
|
1054 |
+
0.8985232762406729
|
1055 |
+
],
|
1056 |
+
"precision_ci": [
|
1057 |
+
0.8299773755655987,
|
1058 |
+
0.8299773755655987
|
1059 |
+
],
|
1060 |
+
"recall_ci": [
|
1061 |
+
0.8055555555555544,
|
1062 |
+
0.8055555555555544
|
1063 |
+
],
|
1064 |
+
"f1_ci": [
|
1065 |
+
0.8175841319366995,
|
1066 |
+
0.8175841319366995
|
1067 |
+
],
|
1068 |
+
"specificity_ci": [
|
1069 |
+
0.8349802371541444,
|
1070 |
+
0.8349802371541444
|
1071 |
+
],
|
1072 |
+
"npv_ci": [
|
1073 |
+
0.8111134812286639,
|
1074 |
+
0.8111134812286639
|
1075 |
+
],
|
1076 |
+
"class_weights": {
|
1077 |
+
"0.0": 0.5106719367588933,
|
1078 |
+
"1.0": 23.925925925925927
|
1079 |
+
}
|
1080 |
+
},
|
1081 |
+
"insult": {
|
1082 |
+
"auc": 0.9178884966596437,
|
1083 |
+
"threshold": 0.22368550300598145,
|
1084 |
+
"precision": 0.8017937840347082,
|
1085 |
+
"recall": 0.9065606361828928,
|
1086 |
+
"f1": 0.8509647346472855,
|
1087 |
+
"specificity": 0.7758950532932412,
|
1088 |
+
"npv": 0.8925162032262658,
|
1089 |
+
"positive_samples": 1509,
|
1090 |
+
"true_positives": 2342,
|
1091 |
+
"false_positives": 579,
|
1092 |
+
"true_negatives": 2004,
|
1093 |
+
"false_negatives": 241,
|
1094 |
+
"auc_ci": [
|
1095 |
+
0.9178884966596437,
|
1096 |
+
0.9178884966596437
|
1097 |
+
],
|
1098 |
+
"precision_ci": [
|
1099 |
+
0.8017937840347082,
|
1100 |
+
0.8017937840347082
|
1101 |
+
],
|
1102 |
+
"recall_ci": [
|
1103 |
+
0.9065606361828928,
|
1104 |
+
0.9065606361828928
|
1105 |
+
],
|
1106 |
+
"f1_ci": [
|
1107 |
+
0.8509647346472855,
|
1108 |
+
0.8509647346472855
|
1109 |
+
],
|
1110 |
+
"specificity_ci": [
|
1111 |
+
0.7758950532932412,
|
1112 |
+
0.7758950532932412
|
1113 |
+
],
|
1114 |
+
"npv_ci": [
|
1115 |
+
0.8925162032262658,
|
1116 |
+
0.8925162032262658
|
1117 |
+
],
|
1118 |
+
"class_weights": {
|
1119 |
+
"0.0": 0.70620388084176,
|
1120 |
+
"1.0": 1.7123923127899272
|
1121 |
+
}
|
1122 |
+
},
|
1123 |
+
"identity_hate": {
|
1124 |
+
"auc": 0.9242209406948756,
|
1125 |
+
"threshold": 0.042373284697532654,
|
1126 |
+
"precision": 0.8424336725093711,
|
1127 |
+
"recall": 0.8592057761732879,
|
1128 |
+
"f1": 0.8507370677416805,
|
1129 |
+
"specificity": 0.839296667348186,
|
1130 |
+
"npv": 0.8563457480377756,
|
1131 |
+
"positive_samples": 277,
|
1132 |
+
"true_positives": 2220,
|
1133 |
+
"false_positives": 415,
|
1134 |
+
"true_negatives": 2168,
|
1135 |
+
"false_negatives": 363,
|
1136 |
+
"auc_ci": [
|
1137 |
+
0.9242209406948756,
|
1138 |
+
0.9242209406948756
|
1139 |
+
],
|
1140 |
+
"precision_ci": [
|
1141 |
+
0.8424336725093711,
|
1142 |
+
0.8424336725093711
|
1143 |
+
],
|
1144 |
+
"recall_ci": [
|
1145 |
+
0.8592057761732879,
|
1146 |
+
0.8592057761732879
|
1147 |
+
],
|
1148 |
+
"f1_ci": [
|
1149 |
+
0.8507370677416805,
|
1150 |
+
0.8507370677416805
|
1151 |
+
],
|
1152 |
+
"specificity_ci": [
|
1153 |
+
0.839296667348186,
|
1154 |
+
0.839296667348186
|
1155 |
+
],
|
1156 |
+
"npv_ci": [
|
1157 |
+
0.8563457480377756,
|
1158 |
+
0.8563457480377756
|
1159 |
+
],
|
1160 |
+
"class_weights": {
|
1161 |
+
"0.0": 0.5283173175219792,
|
1162 |
+
"1.0": 9.328519855595667
|
1163 |
+
}
|
1164 |
+
}
|
1165 |
+
},
|
1166 |
+
"sample_count": 5168
|
1167 |
+
},
|
1168 |
+
"4": {
|
1169 |
+
"auc": 0.9418392933687934,
|
1170 |
+
"precision": 0.7019672150256779,
|
1171 |
+
"recall": 0.9036673990197736,
|
1172 |
+
"f1": 0.766375554274002,
|
1173 |
+
"hamming_loss": 0.1651803024428073,
|
1174 |
+
"exact_match": 0.4955409073284219,
|
1175 |
+
"specificity": 0.8245338509682739,
|
1176 |
+
"class_metrics": {
|
1177 |
+
"toxic": {
|
1178 |
+
"auc": 0.9718317503718501,
|
1179 |
+
"threshold": 0.4544762372970581,
|
1180 |
+
"precision": 0.9205380327767301,
|
1181 |
+
"recall": 0.9217594394705978,
|
1182 |
+
"f1": 0.9211483312394544,
|
1183 |
+
"specificity": 0.9204325994592514,
|
1184 |
+
"npv": 0.9216554888385321,
|
1185 |
+
"positive_samples": 2569,
|
1186 |
+
"true_positives": 2377,
|
1187 |
+
"false_positives": 205,
|
1188 |
+
"true_negatives": 2373,
|
1189 |
+
"false_negatives": 201,
|
1190 |
+
"auc_ci": [
|
1191 |
+
0.9718317503718501,
|
1192 |
+
0.9718317503718501
|
1193 |
+
],
|
1194 |
+
"precision_ci": [
|
1195 |
+
0.9205380327767301,
|
1196 |
+
0.9205380327767301
|
1197 |
+
],
|
1198 |
+
"recall_ci": [
|
1199 |
+
0.9217594394705978,
|
1200 |
+
0.9217594394705978
|
1201 |
+
],
|
1202 |
+
"f1_ci": [
|
1203 |
+
0.9211483312394544,
|
1204 |
+
0.9211483312394544
|
1205 |
+
],
|
1206 |
+
"specificity_ci": [
|
1207 |
+
0.9204325994592514,
|
1208 |
+
0.9204325994592514
|
1209 |
+
],
|
1210 |
+
"npv_ci": [
|
1211 |
+
0.9216554888385321,
|
1212 |
+
0.9216554888385321
|
1213 |
+
],
|
1214 |
+
"class_weights": {
|
1215 |
+
"0.0": 0.9961375048281189,
|
1216 |
+
"1.0": 1.003892565200467
|
1217 |
+
}
|
1218 |
+
},
|
1219 |
+
"severe_toxic": {
|
1220 |
+
"auc": 0.8962662667751142,
|
1221 |
+
"threshold": 0.0307308342307806,
|
1222 |
+
"precision": 0.7913182428501319,
|
1223 |
+
"recall": 0.8458333333333329,
|
1224 |
+
"f1": 0.8176681460830066,
|
1225 |
+
"specificity": 0.7769418462789687,
|
1226 |
+
"npv": 0.834426745622858,
|
1227 |
+
"positive_samples": 240,
|
1228 |
+
"true_positives": 2181,
|
1229 |
+
"false_positives": 575,
|
1230 |
+
"true_negatives": 2003,
|
1231 |
+
"false_negatives": 397,
|
1232 |
+
"auc_ci": [
|
1233 |
+
0.8962662667751142,
|
1234 |
+
0.8962662667751142
|
1235 |
+
],
|
1236 |
+
"precision_ci": [
|
1237 |
+
0.7913182428501319,
|
1238 |
+
0.7913182428501319
|
1239 |
+
],
|
1240 |
+
"recall_ci": [
|
1241 |
+
0.8458333333333329,
|
1242 |
+
0.8458333333333329
|
1243 |
+
],
|
1244 |
+
"f1_ci": [
|
1245 |
+
0.8176681460830066,
|
1246 |
+
0.8176681460830066
|
1247 |
+
],
|
1248 |
+
"specificity_ci": [
|
1249 |
+
0.7769418462789687,
|
1250 |
+
0.7769418462789687
|
1251 |
+
],
|
1252 |
+
"npv_ci": [
|
1253 |
+
0.834426745622858,
|
1254 |
+
0.834426745622858
|
1255 |
+
],
|
1256 |
+
"class_weights": {
|
1257 |
+
"0.0": 0.5244001626677511,
|
1258 |
+
"1.0": 10.745833333333334
|
1259 |
+
}
|
1260 |
+
},
|
1261 |
+
"obscene": {
|
1262 |
+
"auc": 0.9401245966951454,
|
1263 |
+
"threshold": 0.1775909662246704,
|
1264 |
+
"precision": 0.8495468615216861,
|
1265 |
+
"recall": 0.8913398692810475,
|
1266 |
+
"f1": 0.8699417085541208,
|
1267 |
+
"specificity": 0.8421453990848948,
|
1268 |
+
"npv": 0.8857178178787266,
|
1269 |
+
"positive_samples": 1224,
|
1270 |
+
"true_positives": 2298,
|
1271 |
+
"false_positives": 407,
|
1272 |
+
"true_negatives": 2171,
|
1273 |
+
"false_negatives": 280,
|
1274 |
+
"auc_ci": [
|
1275 |
+
0.9401245966951454,
|
1276 |
+
0.9401245966951454
|
1277 |
+
],
|
1278 |
+
"precision_ci": [
|
1279 |
+
0.8495468615216861,
|
1280 |
+
0.8495468615216861
|
1281 |
+
],
|
1282 |
+
"recall_ci": [
|
1283 |
+
0.8913398692810475,
|
1284 |
+
0.8913398692810475
|
1285 |
+
],
|
1286 |
+
"f1_ci": [
|
1287 |
+
0.8699417085541208,
|
1288 |
+
0.8699417085541208
|
1289 |
+
],
|
1290 |
+
"specificity_ci": [
|
1291 |
+
0.8421453990848948,
|
1292 |
+
0.8421453990848948
|
1293 |
+
],
|
1294 |
+
"npv_ci": [
|
1295 |
+
0.8857178178787266,
|
1296 |
+
0.8857178178787266
|
1297 |
+
],
|
1298 |
+
"class_weights": {
|
1299 |
+
"0.0": 0.6555668530757499,
|
1300 |
+
"1.0": 2.1070261437908497
|
1301 |
+
}
|
1302 |
+
},
|
1303 |
+
"threat": {
|
1304 |
+
"auc": 0.8861722579224652,
|
1305 |
+
"threshold": 0.014509523287415504,
|
1306 |
+
"precision": 0.841106024006686,
|
1307 |
+
"recall": 0.7943925233644874,
|
1308 |
+
"f1": 0.81708215259711,
|
1309 |
+
"specificity": 0.8499307067907416,
|
1310 |
+
"npv": 0.8052107636996033,
|
1311 |
+
"positive_samples": 107,
|
1312 |
+
"true_positives": 2048,
|
1313 |
+
"false_positives": 387,
|
1314 |
+
"true_negatives": 2191,
|
1315 |
+
"false_negatives": 530,
|
1316 |
+
"auc_ci": [
|
1317 |
+
0.8861722579224652,
|
1318 |
+
0.8861722579224652
|
1319 |
+
],
|
1320 |
+
"precision_ci": [
|
1321 |
+
0.841106024006686,
|
1322 |
+
0.841106024006686
|
1323 |
+
],
|
1324 |
+
"recall_ci": [
|
1325 |
+
0.7943925233644874,
|
1326 |
+
0.7943925233644874
|
1327 |
+
],
|
1328 |
+
"f1_ci": [
|
1329 |
+
0.81708215259711,
|
1330 |
+
0.81708215259711
|
1331 |
+
],
|
1332 |
+
"specificity_ci": [
|
1333 |
+
0.8499307067907416,
|
1334 |
+
0.8499307067907416
|
1335 |
+
],
|
1336 |
+
"npv_ci": [
|
1337 |
+
0.8052107636996033,
|
1338 |
+
0.8052107636996033
|
1339 |
+
],
|
1340 |
+
"class_weights": {
|
1341 |
+
"0.0": 0.5105919619877252,
|
1342 |
+
"1.0": 24.102803738317757
|
1343 |
+
}
|
1344 |
+
},
|
1345 |
+
"insult": {
|
1346 |
+
"auc": 0.908347099690273,
|
1347 |
+
"threshold": 0.19917058944702148,
|
1348 |
+
"precision": 0.787211545222267,
|
1349 |
+
"recall": 0.9028609447771131,
|
1350 |
+
"f1": 0.8410793781503274,
|
1351 |
+
"specificity": 0.755950752393989,
|
1352 |
+
"npv": 0.8861326740097348,
|
1353 |
+
"positive_samples": 1503,
|
1354 |
+
"true_positives": 2328,
|
1355 |
+
"false_positives": 629,
|
1356 |
+
"true_negatives": 1949,
|
1357 |
+
"false_negatives": 250,
|
1358 |
+
"auc_ci": [
|
1359 |
+
0.908347099690273,
|
1360 |
+
0.908347099690273
|
1361 |
+
],
|
1362 |
+
"precision_ci": [
|
1363 |
+
0.787211545222267,
|
1364 |
+
0.787211545222267
|
1365 |
+
],
|
1366 |
+
"recall_ci": [
|
1367 |
+
0.9028609447771131,
|
1368 |
+
0.9028609447771131
|
1369 |
+
],
|
1370 |
+
"f1_ci": [
|
1371 |
+
0.8410793781503274,
|
1372 |
+
0.8410793781503274
|
1373 |
+
],
|
1374 |
+
"specificity_ci": [
|
1375 |
+
0.755950752393989,
|
1376 |
+
0.755950752393989
|
1377 |
+
],
|
1378 |
+
"npv_ci": [
|
1379 |
+
0.8861326740097348,
|
1380 |
+
0.8861326740097348
|
1381 |
+
],
|
1382 |
+
"class_weights": {
|
1383 |
+
"0.0": 0.7056087551299589,
|
1384 |
+
"1.0": 1.7159015302727878
|
1385 |
+
}
|
1386 |
+
},
|
1387 |
+
"identity_hate": {
|
1388 |
+
"auc": 0.9136671508934288,
|
1389 |
+
"threshold": 0.031982019543647766,
|
1390 |
+
"precision": 0.8173388685191341,
|
1391 |
+
"recall": 0.8868613138686137,
|
1392 |
+
"f1": 0.8506820152960648,
|
1393 |
+
"specificity": 0.801801801801802,
|
1394 |
+
"npv": 0.8763431199913764,
|
1395 |
+
"positive_samples": 274,
|
1396 |
+
"true_positives": 2287,
|
1397 |
+
"false_positives": 511,
|
1398 |
+
"true_negatives": 2067,
|
1399 |
+
"false_negatives": 291,
|
1400 |
+
"auc_ci": [
|
1401 |
+
0.9136671508934288,
|
1402 |
+
0.9136671508934288
|
1403 |
+
],
|
1404 |
+
"precision_ci": [
|
1405 |
+
0.8173388685191341,
|
1406 |
+
0.8173388685191341
|
1407 |
+
],
|
1408 |
+
"recall_ci": [
|
1409 |
+
0.8868613138686137,
|
1410 |
+
0.8868613138686137
|
1411 |
+
],
|
1412 |
+
"f1_ci": [
|
1413 |
+
0.8506820152960648,
|
1414 |
+
0.8506820152960648
|
1415 |
+
],
|
1416 |
+
"specificity_ci": [
|
1417 |
+
0.801801801801802,
|
1418 |
+
0.801801801801802
|
1419 |
+
],
|
1420 |
+
"npv_ci": [
|
1421 |
+
0.8763431199913764,
|
1422 |
+
0.8763431199913764
|
1423 |
+
],
|
1424 |
+
"class_weights": {
|
1425 |
+
"0.0": 0.528050778050778,
|
1426 |
+
"1.0": 9.412408759124087
|
1427 |
+
}
|
1428 |
+
}
|
1429 |
+
},
|
1430 |
+
"sample_count": 5158
|
1431 |
+
},
|
1432 |
+
"5": {
|
1433 |
+
"auc": 0.9460152147041221,
|
1434 |
+
"precision": 0.7347347983801011,
|
1435 |
+
"recall": 0.8867510548523206,
|
1436 |
+
"f1": 0.7840490209789418,
|
1437 |
+
"hamming_loss": 0.13677289804378806,
|
1438 |
+
"exact_match": 0.5347842984842596,
|
1439 |
+
"specificity": 0.8623489178772902,
|
1440 |
+
"class_metrics": {
|
1441 |
+
"toxic": {
|
1442 |
+
"auc": 0.9757415342563065,
|
1443 |
+
"threshold": 0.5313886404037476,
|
1444 |
+
"precision": 0.9310023292772915,
|
1445 |
+
"recall": 0.9121306376360682,
|
1446 |
+
"f1": 0.9214698705828952,
|
1447 |
+
"specificity": 0.9324009324009348,
|
1448 |
+
"npv": 0.9138763886248709,
|
1449 |
+
"positive_samples": 2572,
|
1450 |
+
"true_positives": 2346,
|
1451 |
+
"false_positives": 173,
|
1452 |
+
"true_negatives": 2399,
|
1453 |
+
"false_negatives": 226,
|
1454 |
+
"auc_ci": [
|
1455 |
+
0.9757415342563065,
|
1456 |
+
0.9757415342563065
|
1457 |
+
],
|
1458 |
+
"precision_ci": [
|
1459 |
+
0.9310023292772915,
|
1460 |
+
0.9310023292772915
|
1461 |
+
],
|
1462 |
+
"recall_ci": [
|
1463 |
+
0.9121306376360682,
|
1464 |
+
0.9121306376360682
|
1465 |
+
],
|
1466 |
+
"f1_ci": [
|
1467 |
+
0.9214698705828952,
|
1468 |
+
0.9214698705828952
|
1469 |
+
],
|
1470 |
+
"specificity_ci": [
|
1471 |
+
0.9324009324009348,
|
1472 |
+
0.9324009324009348
|
1473 |
+
],
|
1474 |
+
"npv_ci": [
|
1475 |
+
0.9138763886248709,
|
1476 |
+
0.9138763886248709
|
1477 |
+
],
|
1478 |
+
"class_weights": {
|
1479 |
+
"0.0": 0.9996114996114996,
|
1480 |
+
"1.0": 1.0003888024883358
|
1481 |
+
}
|
1482 |
+
},
|
1483 |
+
"severe_toxic": {
|
1484 |
+
"auc": 0.9032281899714669,
|
1485 |
+
"threshold": 0.05001964047551155,
|
1486 |
+
"precision": 0.8240547826417868,
|
1487 |
+
"recall": 0.8458333333333334,
|
1488 |
+
"f1": 0.8348020409069885,
|
1489 |
+
"specificity": 0.8194048104362093,
|
1490 |
+
"npv": 0.8416483326674401,
|
1491 |
+
"positive_samples": 240,
|
1492 |
+
"true_positives": 2176,
|
1493 |
+
"false_positives": 464,
|
1494 |
+
"true_negatives": 2108,
|
1495 |
+
"false_negatives": 396,
|
1496 |
+
"auc_ci": [
|
1497 |
+
0.9032281899714669,
|
1498 |
+
0.9032281899714669
|
1499 |
+
],
|
1500 |
+
"precision_ci": [
|
1501 |
+
0.8240547826417868,
|
1502 |
+
0.8240547826417868
|
1503 |
+
],
|
1504 |
+
"recall_ci": [
|
1505 |
+
0.8458333333333334,
|
1506 |
+
0.8458333333333334
|
1507 |
+
],
|
1508 |
+
"f1_ci": [
|
1509 |
+
0.8348020409069885,
|
1510 |
+
0.8348020409069885
|
1511 |
+
],
|
1512 |
+
"specificity_ci": [
|
1513 |
+
0.8194048104362093,
|
1514 |
+
0.8194048104362093
|
1515 |
+
],
|
1516 |
+
"npv_ci": [
|
1517 |
+
0.8416483326674401,
|
1518 |
+
0.8416483326674401
|
1519 |
+
],
|
1520 |
+
"class_weights": {
|
1521 |
+
"0.0": 0.5244598450876478,
|
1522 |
+
"1.0": 10.720833333333333
|
1523 |
+
}
|
1524 |
+
},
|
1525 |
+
"obscene": {
|
1526 |
+
"auc": 0.9399297347094935,
|
1527 |
+
"threshold": 0.20134443044662476,
|
1528 |
+
"precision": 0.8638120606436712,
|
1529 |
+
"recall": 0.8799999999999917,
|
1530 |
+
"f1": 0.8718308933886383,
|
1531 |
+
"specificity": 0.8612598826829971,
|
1532 |
+
"npv": 0.8777082380338568,
|
1533 |
+
"positive_samples": 1225,
|
1534 |
+
"true_positives": 2264,
|
1535 |
+
"false_positives": 356,
|
1536 |
+
"true_negatives": 2216,
|
1537 |
+
"false_negatives": 308,
|
1538 |
+
"auc_ci": [
|
1539 |
+
0.9399297347094935,
|
1540 |
+
0.9399297347094935
|
1541 |
+
],
|
1542 |
+
"precision_ci": [
|
1543 |
+
0.8638120606436712,
|
1544 |
+
0.8638120606436712
|
1545 |
+
],
|
1546 |
+
"recall_ci": [
|
1547 |
+
0.8799999999999917,
|
1548 |
+
0.8799999999999917
|
1549 |
+
],
|
1550 |
+
"f1_ci": [
|
1551 |
+
0.8718308933886383,
|
1552 |
+
0.8718308933886383
|
1553 |
+
],
|
1554 |
+
"specificity_ci": [
|
1555 |
+
0.8612598826829971,
|
1556 |
+
0.8612598826829971
|
1557 |
+
],
|
1558 |
+
"npv_ci": [
|
1559 |
+
0.8777082380338568,
|
1560 |
+
0.8777082380338568
|
1561 |
+
],
|
1562 |
+
"class_weights": {
|
1563 |
+
"0.0": 0.6562101504718184,
|
1564 |
+
"1.0": 2.100408163265306
|
1565 |
+
}
|
1566 |
+
},
|
1567 |
+
"threat": {
|
1568 |
+
"auc": 0.8786647405643102,
|
1569 |
+
"threshold": 0.018557138741016388,
|
1570 |
+
"precision": 0.8659949024954022,
|
1571 |
+
"recall": 0.8055555555555568,
|
1572 |
+
"f1": 0.834682556458845,
|
1573 |
+
"specificity": 0.8753473600635171,
|
1574 |
+
"npv": 0.8182408543184921,
|
1575 |
+
"positive_samples": 108,
|
1576 |
+
"true_positives": 2072,
|
1577 |
+
"false_positives": 320,
|
1578 |
+
"true_negatives": 2252,
|
1579 |
+
"false_negatives": 500,
|
1580 |
+
"auc_ci": [
|
1581 |
+
0.8786647405643102,
|
1582 |
+
0.8786647405643102
|
1583 |
+
],
|
1584 |
+
"precision_ci": [
|
1585 |
+
0.8659949024954022,
|
1586 |
+
0.8659949024954022
|
1587 |
+
],
|
1588 |
+
"recall_ci": [
|
1589 |
+
0.8055555555555568,
|
1590 |
+
0.8055555555555568
|
1591 |
+
],
|
1592 |
+
"f1_ci": [
|
1593 |
+
0.834682556458845,
|
1594 |
+
0.834682556458845
|
1595 |
+
],
|
1596 |
+
"specificity_ci": [
|
1597 |
+
0.8753473600635171,
|
1598 |
+
0.8753473600635171
|
1599 |
+
],
|
1600 |
+
"npv_ci": [
|
1601 |
+
0.8182408543184921,
|
1602 |
+
0.8182408543184921
|
1603 |
+
],
|
1604 |
+
"class_weights": {
|
1605 |
+
"0.0": 0.5107185391028186,
|
1606 |
+
"1.0": 23.824074074074073
|
1607 |
+
}
|
1608 |
+
},
|
1609 |
+
"insult": {
|
1610 |
+
"auc": 0.9170891169219639,
|
1611 |
+
"threshold": 0.32249945402145386,
|
1612 |
+
"precision": 0.8355108316117581,
|
1613 |
+
"recall": 0.8716755319149065,
|
1614 |
+
"f1": 0.8532101288125946,
|
1615 |
+
"specificity": 0.8283909939593549,
|
1616 |
+
"npv": 0.8658697667424693,
|
1617 |
+
"positive_samples": 1504,
|
1618 |
+
"true_positives": 2242,
|
1619 |
+
"false_positives": 441,
|
1620 |
+
"true_negatives": 2131,
|
1621 |
+
"false_negatives": 330,
|
1622 |
+
"auc_ci": [
|
1623 |
+
0.9170891169219639,
|
1624 |
+
0.9170891169219639
|
1625 |
+
],
|
1626 |
+
"precision_ci": [
|
1627 |
+
0.8355108316117581,
|
1628 |
+
0.8355108316117581
|
1629 |
+
],
|
1630 |
+
"recall_ci": [
|
1631 |
+
0.8716755319149065,
|
1632 |
+
0.8716755319149065
|
1633 |
+
],
|
1634 |
+
"f1_ci": [
|
1635 |
+
0.8532101288125946,
|
1636 |
+
0.8532101288125946
|
1637 |
+
],
|
1638 |
+
"specificity_ci": [
|
1639 |
+
0.8283909939593549,
|
1640 |
+
0.8283909939593549
|
1641 |
+
],
|
1642 |
+
"npv_ci": [
|
1643 |
+
0.8658697667424693,
|
1644 |
+
0.8658697667424693
|
1645 |
+
],
|
1646 |
+
"class_weights": {
|
1647 |
+
"0.0": 0.7064799560680944,
|
1648 |
+
"1.0": 1.7107712765957446
|
1649 |
+
}
|
1650 |
+
},
|
1651 |
+
"identity_hate": {
|
1652 |
+
"auc": 0.9171971252566641,
|
1653 |
+
"threshold": 0.055891502648591995,
|
1654 |
+
"precision": 0.8532420335871026,
|
1655 |
+
"recall": 0.829710144927536,
|
1656 |
+
"f1": 0.8413115718720496,
|
1657 |
+
"specificity": 0.8572895277207252,
|
1658 |
+
"npv": 0.8342805841339561,
|
1659 |
+
"positive_samples": 276,
|
1660 |
+
"true_positives": 2134,
|
1661 |
+
"false_positives": 367,
|
1662 |
+
"true_negatives": 2205,
|
1663 |
+
"false_negatives": 438,
|
1664 |
+
"auc_ci": [
|
1665 |
+
0.9171971252566641,
|
1666 |
+
0.9171971252566641
|
1667 |
+
],
|
1668 |
+
"precision_ci": [
|
1669 |
+
0.8532420335871026,
|
1670 |
+
0.8532420335871026
|
1671 |
+
],
|
1672 |
+
"recall_ci": [
|
1673 |
+
0.829710144927536,
|
1674 |
+
0.829710144927536
|
1675 |
+
],
|
1676 |
+
"f1_ci": [
|
1677 |
+
0.8413115718720496,
|
1678 |
+
0.8413115718720496
|
1679 |
+
],
|
1680 |
+
"specificity_ci": [
|
1681 |
+
0.8572895277207252,
|
1682 |
+
0.8572895277207252
|
1683 |
+
],
|
1684 |
+
"npv_ci": [
|
1685 |
+
0.8342805841339561,
|
1686 |
+
0.8342805841339561
|
1687 |
+
],
|
1688 |
+
"class_weights": {
|
1689 |
+
"0.0": 0.5283367556468173,
|
1690 |
+
"1.0": 9.322463768115941
|
1691 |
+
}
|
1692 |
+
}
|
1693 |
+
},
|
1694 |
+
"sample_count": 5146
|
1695 |
+
},
|
1696 |
+
"6": {
|
1697 |
+
"auc": 0.9462815482574403,
|
1698 |
+
"precision": 0.7134961462135606,
|
1699 |
+
"recall": 0.9073793914943687,
|
1700 |
+
"f1": 0.7744642816056855,
|
1701 |
+
"hamming_loss": 0.15539933230611197,
|
1702 |
+
"exact_match": 0.5132896764252697,
|
1703 |
+
"specificity": 0.8360743701752594,
|
1704 |
+
"class_metrics": {
|
1705 |
+
"toxic": {
|
1706 |
+
"auc": 0.9780732995232411,
|
1707 |
+
"threshold": 0.5710838437080383,
|
1708 |
+
"precision": 0.9379357119021944,
|
1709 |
+
"recall": 0.9243012422360248,
|
1710 |
+
"f1": 0.9310685643115885,
|
1711 |
+
"specificity": 0.9388379204893005,
|
1712 |
+
"npv": 0.9253858836387251,
|
1713 |
+
"positive_samples": 2576,
|
1714 |
+
"true_positives": 2399,
|
1715 |
+
"false_positives": 158,
|
1716 |
+
"true_negatives": 2437,
|
1717 |
+
"false_negatives": 196,
|
1718 |
+
"auc_ci": [
|
1719 |
+
0.9780732995232411,
|
1720 |
+
0.9780732995232411
|
1721 |
+
],
|
1722 |
+
"precision_ci": [
|
1723 |
+
0.9379357119021944,
|
1724 |
+
0.9379357119021944
|
1725 |
+
],
|
1726 |
+
"recall_ci": [
|
1727 |
+
0.9243012422360248,
|
1728 |
+
0.9243012422360248
|
1729 |
+
],
|
1730 |
+
"f1_ci": [
|
1731 |
+
0.9310685643115885,
|
1732 |
+
0.9310685643115885
|
1733 |
+
],
|
1734 |
+
"specificity_ci": [
|
1735 |
+
0.9388379204893005,
|
1736 |
+
0.9388379204893005
|
1737 |
+
],
|
1738 |
+
"npv_ci": [
|
1739 |
+
0.9253858836387251,
|
1740 |
+
0.9253858836387251
|
1741 |
+
],
|
1742 |
+
"class_weights": {
|
1743 |
+
"0.0": 0.9923547400611621,
|
1744 |
+
"1.0": 1.0077639751552796
|
1745 |
+
}
|
1746 |
+
},
|
1747 |
+
"severe_toxic": {
|
1748 |
+
"auc": 0.9067576592369966,
|
1749 |
+
"threshold": 0.023807251825928688,
|
1750 |
+
"precision": 0.7794259030353159,
|
1751 |
+
"recall": 0.9380165289256208,
|
1752 |
+
"f1": 0.8513989948241057,
|
1753 |
+
"specificity": 0.7345454545454645,
|
1754 |
+
"npv": 0.9221830255239729,
|
1755 |
+
"positive_samples": 242,
|
1756 |
+
"true_positives": 2435,
|
1757 |
+
"false_positives": 689,
|
1758 |
+
"true_negatives": 1906,
|
1759 |
+
"false_negatives": 160,
|
1760 |
+
"auc_ci": [
|
1761 |
+
0.9067576592369966,
|
1762 |
+
0.9067576592369966
|
1763 |
+
],
|
1764 |
+
"precision_ci": [
|
1765 |
+
0.7794259030353159,
|
1766 |
+
0.7794259030353159
|
1767 |
+
],
|
1768 |
+
"recall_ci": [
|
1769 |
+
0.9380165289256208,
|
1770 |
+
0.9380165289256208
|
1771 |
+
],
|
1772 |
+
"f1_ci": [
|
1773 |
+
0.8513989948241057,
|
1774 |
+
0.8513989948241057
|
1775 |
+
],
|
1776 |
+
"specificity_ci": [
|
1777 |
+
0.7345454545454645,
|
1778 |
+
0.7345454545454645
|
1779 |
+
],
|
1780 |
+
"npv_ci": [
|
1781 |
+
0.9221830255239729,
|
1782 |
+
0.9221830255239729
|
1783 |
+
],
|
1784 |
+
"class_weights": {
|
1785 |
+
"0.0": 0.5244444444444445,
|
1786 |
+
"1.0": 10.727272727272727
|
1787 |
+
}
|
1788 |
+
},
|
1789 |
+
"obscene": {
|
1790 |
+
"auc": 0.9375048626461102,
|
1791 |
+
"threshold": 0.14760328829288483,
|
1792 |
+
"precision": 0.8287449241470627,
|
1793 |
+
"recall": 0.9084278768233371,
|
1794 |
+
"f1": 0.8667588986547364,
|
1795 |
+
"specificity": 0.8122789287518954,
|
1796 |
+
"npv": 0.8986867106241987,
|
1797 |
+
"positive_samples": 1234,
|
1798 |
+
"true_positives": 2358,
|
1799 |
+
"false_positives": 487,
|
1800 |
+
"true_negatives": 2108,
|
1801 |
+
"false_negatives": 237,
|
1802 |
+
"auc_ci": [
|
1803 |
+
0.9375048626461102,
|
1804 |
+
0.9375048626461102
|
1805 |
+
],
|
1806 |
+
"precision_ci": [
|
1807 |
+
0.8287449241470627,
|
1808 |
+
0.8287449241470627
|
1809 |
+
],
|
1810 |
+
"recall_ci": [
|
1811 |
+
0.9084278768233371,
|
1812 |
+
0.9084278768233371
|
1813 |
+
],
|
1814 |
+
"f1_ci": [
|
1815 |
+
0.8667588986547364,
|
1816 |
+
0.8667588986547364
|
1817 |
+
],
|
1818 |
+
"specificity_ci": [
|
1819 |
+
0.8122789287518954,
|
1820 |
+
0.8122789287518954
|
1821 |
+
],
|
1822 |
+
"npv_ci": [
|
1823 |
+
0.8986867106241987,
|
1824 |
+
0.8986867106241987
|
1825 |
+
],
|
1826 |
+
"class_weights": {
|
1827 |
+
"0.0": 0.6558868115209702,
|
1828 |
+
"1.0": 2.1037277147487843
|
1829 |
+
}
|
1830 |
+
},
|
1831 |
+
"threat": {
|
1832 |
+
"auc": 0.9031869137455802,
|
1833 |
+
"threshold": 0.026773449033498764,
|
1834 |
+
"precision": 0.9112427696973145,
|
1835 |
+
"recall": 0.761467889908257,
|
1836 |
+
"f1": 0.8296498919893159,
|
1837 |
+
"specificity": 0.9258312020460328,
|
1838 |
+
"npv": 0.7951394486538688,
|
1839 |
+
"positive_samples": 109,
|
1840 |
+
"true_positives": 1976,
|
1841 |
+
"false_positives": 192,
|
1842 |
+
"true_negatives": 2403,
|
1843 |
+
"false_negatives": 619,
|
1844 |
+
"auc_ci": [
|
1845 |
+
0.9031869137455802,
|
1846 |
+
0.9031869137455802
|
1847 |
+
],
|
1848 |
+
"precision_ci": [
|
1849 |
+
0.9112427696973145,
|
1850 |
+
0.9112427696973145
|
1851 |
+
],
|
1852 |
+
"recall_ci": [
|
1853 |
+
0.761467889908257,
|
1854 |
+
0.761467889908257
|
1855 |
+
],
|
1856 |
+
"f1_ci": [
|
1857 |
+
0.8296498919893159,
|
1858 |
+
0.8296498919893159
|
1859 |
+
],
|
1860 |
+
"specificity_ci": [
|
1861 |
+
0.9258312020460328,
|
1862 |
+
0.9258312020460328
|
1863 |
+
],
|
1864 |
+
"npv_ci": [
|
1865 |
+
0.7951394486538688,
|
1866 |
+
0.7951394486538688
|
1867 |
+
],
|
1868 |
+
"class_weights": {
|
1869 |
+
"0.0": 0.5107220145583317,
|
1870 |
+
"1.0": 23.81651376146789
|
1871 |
+
}
|
1872 |
+
},
|
1873 |
+
"insult": {
|
1874 |
+
"auc": 0.9164838070297321,
|
1875 |
+
"threshold": 0.2600024938583374,
|
1876 |
+
"precision": 0.8178816065079044,
|
1877 |
+
"recall": 0.8940397350993466,
|
1878 |
+
"f1": 0.8542666500534941,
|
1879 |
+
"specificity": 0.8009234111895767,
|
1880 |
+
"npv": 0.8831600262588531,
|
1881 |
+
"positive_samples": 1510,
|
1882 |
+
"true_positives": 2320,
|
1883 |
+
"false_positives": 516,
|
1884 |
+
"true_negatives": 2079,
|
1885 |
+
"false_negatives": 275,
|
1886 |
+
"auc_ci": [
|
1887 |
+
0.9164838070297321,
|
1888 |
+
0.9164838070297321
|
1889 |
+
],
|
1890 |
+
"precision_ci": [
|
1891 |
+
0.8178816065079044,
|
1892 |
+
0.8178816065079044
|
1893 |
+
],
|
1894 |
+
"recall_ci": [
|
1895 |
+
0.8940397350993466,
|
1896 |
+
0.8940397350993466
|
1897 |
+
],
|
1898 |
+
"f1_ci": [
|
1899 |
+
0.8542666500534941,
|
1900 |
+
0.8542666500534941
|
1901 |
+
],
|
1902 |
+
"specificity_ci": [
|
1903 |
+
0.8009234111895767,
|
1904 |
+
0.8009234111895767
|
1905 |
+
],
|
1906 |
+
"npv_ci": [
|
1907 |
+
0.8831600262588531,
|
1908 |
+
0.8831600262588531
|
1909 |
+
],
|
1910 |
+
"class_weights": {
|
1911 |
+
"0.0": 0.7050516023900054,
|
1912 |
+
"1.0": 1.719205298013245
|
1913 |
+
}
|
1914 |
+
},
|
1915 |
+
"identity_hate": {
|
1916 |
+
"auc": 0.9038051609994096,
|
1917 |
+
"threshold": 0.03315547853708267,
|
1918 |
+
"precision": 0.8124487711378064,
|
1919 |
+
"recall": 0.8489208633093526,
|
1920 |
+
"f1": 0.8302844808144539,
|
1921 |
+
"specificity": 0.804029304029316,
|
1922 |
+
"npv": 0.8418199125360486,
|
1923 |
+
"positive_samples": 278,
|
1924 |
+
"true_positives": 2203,
|
1925 |
+
"false_positives": 508,
|
1926 |
+
"true_negatives": 2087,
|
1927 |
+
"false_negatives": 392,
|
1928 |
+
"auc_ci": [
|
1929 |
+
0.9038051609994096,
|
1930 |
+
0.9038051609994096
|
1931 |
+
],
|
1932 |
+
"precision_ci": [
|
1933 |
+
0.8124487711378064,
|
1934 |
+
0.8124487711378064
|
1935 |
+
],
|
1936 |
+
"recall_ci": [
|
1937 |
+
0.8489208633093526,
|
1938 |
+
0.8489208633093526
|
1939 |
+
],
|
1940 |
+
"f1_ci": [
|
1941 |
+
0.8302844808144539,
|
1942 |
+
0.8302844808144539
|
1943 |
+
],
|
1944 |
+
"specificity_ci": [
|
1945 |
+
0.804029304029316,
|
1946 |
+
0.804029304029316
|
1947 |
+
],
|
1948 |
+
"npv_ci": [
|
1949 |
+
0.8418199125360486,
|
1950 |
+
0.8418199125360486
|
1951 |
+
],
|
1952 |
+
"class_weights": {
|
1953 |
+
"0.0": 0.5282865282865283,
|
1954 |
+
"1.0": 9.338129496402878
|
1955 |
+
}
|
1956 |
+
}
|
1957 |
+
},
|
1958 |
+
"sample_count": 5192
|
1959 |
+
}
|
1960 |
+
},
|
1961 |
+
"per_class": {},
|
1962 |
+
"thresholds": {
|
1963 |
+
"0": {
|
1964 |
+
"toxic": 0.46047261357307434,
|
1965 |
+
"severe_toxic": 0.03537772223353386,
|
1966 |
+
"obscene": 0.2777131497859955,
|
1967 |
+
"threat": 0.016539234668016434,
|
1968 |
+
"insult": 0.25907590985298157,
|
1969 |
+
"identity_hate": 0.026042653247714043
|
1970 |
+
},
|
1971 |
+
"1": {
|
1972 |
+
"toxic": 0.44148319959640503,
|
1973 |
+
"severe_toxic": 0.03648429363965988,
|
1974 |
+
"obscene": 0.1990610957145691,
|
1975 |
+
"threat": 0.012619060464203358,
|
1976 |
+
"insult": 0.24214455485343933,
|
1977 |
+
"identity_hate": 0.03167847916483879
|
1978 |
+
},
|
1979 |
+
"2": {
|
1980 |
+
"toxic": 0.3978160321712494,
|
1981 |
+
"severe_toxic": 0.015000982210040092,
|
1982 |
+
"obscene": 0.11362762749195099,
|
1983 |
+
"threat": 0.008195769973099232,
|
1984 |
+
"insult": 0.1587354838848114,
|
1985 |
+
"identity_hate": 0.0467526838183403
|
1986 |
+
},
|
1987 |
+
"3": {
|
1988 |
+
"toxic": 0.5033379793167114,
|
1989 |
+
"severe_toxic": 0.021415209397673607,
|
1990 |
+
"obscene": 0.14896434545516968,
|
1991 |
+
"threat": 0.013273251242935658,
|
1992 |
+
"insult": 0.22368550300598145,
|
1993 |
+
"identity_hate": 0.042373284697532654
|
1994 |
+
},
|
1995 |
+
"4": {
|
1996 |
+
"toxic": 0.4544762372970581,
|
1997 |
+
"severe_toxic": 0.0307308342307806,
|
1998 |
+
"obscene": 0.1775909662246704,
|
1999 |
+
"threat": 0.014509523287415504,
|
2000 |
+
"insult": 0.19917058944702148,
|
2001 |
+
"identity_hate": 0.031982019543647766
|
2002 |
+
},
|
2003 |
+
"5": {
|
2004 |
+
"toxic": 0.5313886404037476,
|
2005 |
+
"severe_toxic": 0.05001964047551155,
|
2006 |
+
"obscene": 0.20134443044662476,
|
2007 |
+
"threat": 0.018557138741016388,
|
2008 |
+
"insult": 0.32249945402145386,
|
2009 |
+
"identity_hate": 0.055891502648591995
|
2010 |
+
},
|
2011 |
+
"6": {
|
2012 |
+
"toxic": 0.5710838437080383,
|
2013 |
+
"severe_toxic": 0.023807251825928688,
|
2014 |
+
"obscene": 0.14760328829288483,
|
2015 |
+
"threat": 0.026773449033498764,
|
2016 |
+
"insult": 0.2600024938583374,
|
2017 |
+
"identity_hate": 0.03315547853708267
|
2018 |
+
}
|
2019 |
+
}
|
2020 |
+
}
|
evaluation_results/eval_20250208_161149/plots/calibration_0.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_1.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_2.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_3.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_4.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_5.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_6.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/class_calibration.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/language_performance.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/plots/metric_correlations.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/plots/overall_calibration.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/plots/performance_distributions.png
ADDED
![]() |
evaluation_results/eval_20250208_161149/predictions.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d562e6c02fc268d01464f9716846556a75e863ec9cc03d582f39e14191cbd496
|
3 |
+
size 809713
|
evaluation_results/eval_20250208_161149/thresholds.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": {
|
3 |
+
"toxic": 0.46047261357307434,
|
4 |
+
"severe_toxic": 0.03537772223353386,
|
5 |
+
"obscene": 0.2777131497859955,
|
6 |
+
"threat": 0.016539234668016434,
|
7 |
+
"insult": 0.25907590985298157,
|
8 |
+
"identity_hate": 0.026042653247714043
|
9 |
+
},
|
10 |
+
"1": {
|
11 |
+
"toxic": 0.44148319959640503,
|
12 |
+
"severe_toxic": 0.03648429363965988,
|
13 |
+
"obscene": 0.1990610957145691,
|
14 |
+
"threat": 0.012619060464203358,
|
15 |
+
"insult": 0.24214455485343933,
|
16 |
+
"identity_hate": 0.03167847916483879
|
17 |
+
},
|
18 |
+
"2": {
|
19 |
+
"toxic": 0.3978160321712494,
|
20 |
+
"severe_toxic": 0.015000982210040092,
|
21 |
+
"obscene": 0.11362762749195099,
|
22 |
+
"threat": 0.008195769973099232,
|
23 |
+
"insult": 0.1587354838848114,
|
24 |
+
"identity_hate": 0.0467526838183403
|
25 |
+
},
|
26 |
+
"3": {
|
27 |
+
"toxic": 0.5033379793167114,
|
28 |
+
"severe_toxic": 0.021415209397673607,
|
29 |
+
"obscene": 0.14896434545516968,
|
30 |
+
"threat": 0.013273251242935658,
|
31 |
+
"insult": 0.22368550300598145,
|
32 |
+
"identity_hate": 0.042373284697532654
|
33 |
+
},
|
34 |
+
"4": {
|
35 |
+
"toxic": 0.4544762372970581,
|
36 |
+
"severe_toxic": 0.0307308342307806,
|
37 |
+
"obscene": 0.1775909662246704,
|
38 |
+
"threat": 0.014509523287415504,
|
39 |
+
"insult": 0.19917058944702148,
|
40 |
+
"identity_hate": 0.031982019543647766
|
41 |
+
},
|
42 |
+
"5": {
|
43 |
+
"toxic": 0.5313886404037476,
|
44 |
+
"severe_toxic": 0.05001964047551155,
|
45 |
+
"obscene": 0.20134443044662476,
|
46 |
+
"threat": 0.018557138741016388,
|
47 |
+
"insult": 0.32249945402145386,
|
48 |
+
"identity_hate": 0.055891502648591995
|
49 |
+
},
|
50 |
+
"6": {
|
51 |
+
"toxic": 0.5710838437080383,
|
52 |
+
"severe_toxic": 0.023807251825928688,
|
53 |
+
"obscene": 0.14760328829288483,
|
54 |
+
"threat": 0.026773449033498764,
|
55 |
+
"insult": 0.2600024938583374,
|
56 |
+
"identity_hate": 0.03315547853708267
|
57 |
+
}
|
58 |
+
}
|
evaluation_results/eval_20250401_143401/eval_params.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"timestamp": "20250401_143401",
|
3 |
+
"model_path": "weights/toxic_classifier_xlm-roberta-large",
|
4 |
+
"checkpoint": null,
|
5 |
+
"test_file": "dataset/split/val.csv",
|
6 |
+
"batch_size": 64,
|
7 |
+
"num_workers": 16,
|
8 |
+
"cache_dir": "cached_data",
|
9 |
+
"force_retokenize": false,
|
10 |
+
"prefetch_factor": 2,
|
11 |
+
"max_length": 128,
|
12 |
+
"gc_frequency": 500,
|
13 |
+
"label_columns": [
|
14 |
+
"toxic",
|
15 |
+
"severe_toxic",
|
16 |
+
"obscene",
|
17 |
+
"threat",
|
18 |
+
"insult",
|
19 |
+
"identity_hate"
|
20 |
+
]
|
21 |
+
}
|
evaluation_results/eval_20250401_143401/evaluation_results.json
ADDED
@@ -0,0 +1,684 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"default_thresholds": {
|
3 |
+
"overall": {
|
4 |
+
"auc_macro": 0.9116120481007194,
|
5 |
+
"auc_weighted": 0.9305869103434485,
|
6 |
+
"precision_macro": 0.7017348731216243,
|
7 |
+
"precision_weighted": 0.7941268867549155,
|
8 |
+
"recall_macro": 0.4685972374699909,
|
9 |
+
"recall_weighted": 0.7276981501898812,
|
10 |
+
"f1_macro": 0.5228946160541719,
|
11 |
+
"f1_weighted": 0.7469638283202927,
|
12 |
+
"hamming_loss": 0.08497391889618038,
|
13 |
+
"exact_match": 0.6461383139828369
|
14 |
+
},
|
15 |
+
"per_language": {
|
16 |
+
"0": {
|
17 |
+
"auc_macro": 0.9445681226397739,
|
18 |
+
"auc_weighted": 0.9465404082666297,
|
19 |
+
"precision_macro": 0.7219326082283263,
|
20 |
+
"precision_weighted": 0.7908382685179838,
|
21 |
+
"recall_macro": 0.5535398284592582,
|
22 |
+
"recall_weighted": 0.7833787465940054,
|
23 |
+
"f1_macro": 0.6000668677340134,
|
24 |
+
"f1_weighted": 0.7786737821480415,
|
25 |
+
"hamming_loss": 0.07650567773465575,
|
26 |
+
"exact_match": 0.6601983613626563,
|
27 |
+
"sample_count": 4638
|
28 |
+
},
|
29 |
+
"1": {
|
30 |
+
"auc_macro": 0.9064189306891727,
|
31 |
+
"auc_weighted": 0.9274078123911156,
|
32 |
+
"precision_macro": 0.6864158919056594,
|
33 |
+
"precision_weighted": 0.7852581089086744,
|
34 |
+
"recall_macro": 0.44366116589032245,
|
35 |
+
"recall_weighted": 0.7238780977896851,
|
36 |
+
"f1_macro": 0.48488161881757197,
|
37 |
+
"f1_weighted": 0.737051270947713,
|
38 |
+
"hamming_loss": 0.08752166377816291,
|
39 |
+
"exact_match": 0.6402849990371654,
|
40 |
+
"sample_count": 5193
|
41 |
+
},
|
42 |
+
"2": {
|
43 |
+
"auc_macro": 0.8945135400492461,
|
44 |
+
"auc_weighted": 0.9120120071881025,
|
45 |
+
"precision_macro": 0.7178271955012184,
|
46 |
+
"precision_weighted": 0.7982113173628885,
|
47 |
+
"recall_macro": 0.4043111379749362,
|
48 |
+
"recall_weighted": 0.6535947712418301,
|
49 |
+
"f1_macro": 0.4738257066120983,
|
50 |
+
"f1_weighted": 0.7027905834489889,
|
51 |
+
"hamming_loss": 0.09504905757810483,
|
52 |
+
"exact_match": 0.6229666924864447,
|
53 |
+
"sample_count": 5164
|
54 |
+
},
|
55 |
+
"3": {
|
56 |
+
"auc_macro": 0.9135727964673032,
|
57 |
+
"auc_weighted": 0.9339502655719858,
|
58 |
+
"precision_macro": 0.7093511783545062,
|
59 |
+
"precision_weighted": 0.7989932896421867,
|
60 |
+
"recall_macro": 0.4814045378504133,
|
61 |
+
"recall_weighted": 0.7405478070912451,
|
62 |
+
"f1_macro": 0.5327086132158053,
|
63 |
+
"f1_weighted": 0.7545000455696493,
|
64 |
+
"hamming_loss": 0.08359133126934984,
|
65 |
+
"exact_match": 0.6480263157894737,
|
66 |
+
"sample_count": 5168
|
67 |
+
},
|
68 |
+
"4": {
|
69 |
+
"auc_macro": 0.9050160058685811,
|
70 |
+
"auc_weighted": 0.9286663336151794,
|
71 |
+
"precision_macro": 0.6819384343494851,
|
72 |
+
"precision_weighted": 0.7945304496145832,
|
73 |
+
"recall_macro": 0.4656370270227365,
|
74 |
+
"recall_weighted": 0.7256427604871448,
|
75 |
+
"f1_macro": 0.5189060171591118,
|
76 |
+
"f1_weighted": 0.7474398480273773,
|
77 |
+
"hamming_loss": 0.08477150798267727,
|
78 |
+
"exact_match": 0.6509598603839442,
|
79 |
+
"sample_count": 5157
|
80 |
+
},
|
81 |
+
"5": {
|
82 |
+
"auc_macro": 0.9115535221829411,
|
83 |
+
"auc_weighted": 0.9337271942250184,
|
84 |
+
"precision_macro": 0.6927437323462047,
|
85 |
+
"precision_weighted": 0.7984424245250574,
|
86 |
+
"recall_macro": 0.4695924180409275,
|
87 |
+
"recall_weighted": 0.739629005059022,
|
88 |
+
"f1_macro": 0.5191221600663896,
|
89 |
+
"f1_weighted": 0.7554966948679994,
|
90 |
+
"hamming_loss": 0.08252364295893251,
|
91 |
+
"exact_match": 0.6525456665371162,
|
92 |
+
"sample_count": 5146
|
93 |
+
},
|
94 |
+
"6": {
|
95 |
+
"auc_macro": 0.9045493247421005,
|
96 |
+
"auc_weighted": 0.9308415576648513,
|
97 |
+
"precision_macro": 0.6958021612757893,
|
98 |
+
"precision_weighted": 0.7925797967619269,
|
99 |
+
"recall_macro": 0.4680867128534896,
|
100 |
+
"recall_weighted": 0.735071488645921,
|
101 |
+
"f1_macro": 0.5184729138243417,
|
102 |
+
"f1_weighted": 0.7510735996739993,
|
103 |
+
"hamming_loss": 0.0839753466872111,
|
104 |
+
"exact_match": 0.6494607087827426,
|
105 |
+
"sample_count": 5192
|
106 |
+
}
|
107 |
+
},
|
108 |
+
"per_class": {
|
109 |
+
"toxic": {
|
110 |
+
"auc": 0.9619106577495796,
|
111 |
+
"threshold": 0.5,
|
112 |
+
"precision": 0.9067127628925382,
|
113 |
+
"recall": 0.8891902582358592,
|
114 |
+
"f1": 0.8978660276161132,
|
115 |
+
"support": 17697,
|
116 |
+
"brier": 0.09342169378057544,
|
117 |
+
"true_positives": 15736,
|
118 |
+
"false_positives": 1619,
|
119 |
+
"true_negatives": 16342,
|
120 |
+
"false_negatives": 1961
|
121 |
+
},
|
122 |
+
"severe_toxic": {
|
123 |
+
"auc": 0.9017555053121755,
|
124 |
+
"threshold": 0.5,
|
125 |
+
"precision": 0.5620915032679739,
|
126 |
+
"recall": 0.15589123867069488,
|
127 |
+
"f1": 0.24408703878902555,
|
128 |
+
"support": 1655,
|
129 |
+
"brier": 0.05564494143865772,
|
130 |
+
"true_positives": 258,
|
131 |
+
"false_positives": 201,
|
132 |
+
"true_negatives": 33802,
|
133 |
+
"false_negatives": 1397
|
134 |
+
},
|
135 |
+
"obscene": {
|
136 |
+
"auc": 0.9247491461802884,
|
137 |
+
"threshold": 0.5,
|
138 |
+
"precision": 0.7636434008515031,
|
139 |
+
"recall": 0.686181312311616,
|
140 |
+
"f1": 0.7228430115405752,
|
141 |
+
"support": 8626,
|
142 |
+
"brier": 0.1102165916686836,
|
143 |
+
"true_positives": 5919,
|
144 |
+
"false_positives": 1832,
|
145 |
+
"true_negatives": 25200,
|
146 |
+
"false_negatives": 2707
|
147 |
+
},
|
148 |
+
"threat": {
|
149 |
+
"auc": 0.8978719938708597,
|
150 |
+
"threshold": 0.5,
|
151 |
+
"precision": 0.6042553191489362,
|
152 |
+
"recall": 0.1868421052631579,
|
153 |
+
"f1": 0.28542713567839195,
|
154 |
+
"support": 760,
|
155 |
+
"brier": 0.03694216309848939,
|
156 |
+
"true_positives": 142,
|
157 |
+
"false_positives": 93,
|
158 |
+
"true_negatives": 34805,
|
159 |
+
"false_negatives": 618
|
160 |
+
},
|
161 |
+
"insult": {
|
162 |
+
"auc": 0.8962985964590791,
|
163 |
+
"threshold": 0.5,
|
164 |
+
"precision": 0.6981960484871623,
|
165 |
+
"recall": 0.7172271791352093,
|
166 |
+
"f1": 0.7075836718901142,
|
167 |
+
"support": 10199,
|
168 |
+
"brier": 0.1366709113756841,
|
169 |
+
"true_positives": 7315,
|
170 |
+
"false_positives": 3162,
|
171 |
+
"true_negatives": 22297,
|
172 |
+
"false_negatives": 2884
|
173 |
+
},
|
174 |
+
"identity_hate": {
|
175 |
+
"auc": 0.887086389032334,
|
176 |
+
"threshold": 0.5,
|
177 |
+
"precision": 0.6755102040816326,
|
178 |
+
"recall": 0.17625133120340788,
|
179 |
+
"f1": 0.2795608108108108,
|
180 |
+
"support": 1878,
|
181 |
+
"brier": 0.06076370760519854,
|
182 |
+
"true_positives": 331,
|
183 |
+
"false_positives": 159,
|
184 |
+
"true_negatives": 33621,
|
185 |
+
"false_negatives": 1547
|
186 |
+
}
|
187 |
+
}
|
188 |
+
},
|
189 |
+
"optimized_thresholds": {
|
190 |
+
"overall": {
|
191 |
+
"auc_macro": 0.9116120481007194,
|
192 |
+
"auc_weighted": 0.9305869103434485,
|
193 |
+
"precision_macro": 0.5775888380947196,
|
194 |
+
"precision_weighted": 0.7443465124836487,
|
195 |
+
"recall_macro": 0.639900823721825,
|
196 |
+
"recall_weighted": 0.798186941075585,
|
197 |
+
"f1_macro": 0.6040131510667749,
|
198 |
+
"f1_weighted": 0.7686775463209056,
|
199 |
+
"hamming_loss": 0.09459775272496121,
|
200 |
+
"exact_match": 0.6191317516405855
|
201 |
+
},
|
202 |
+
"per_language": {
|
203 |
+
"0": {
|
204 |
+
"auc_macro": 0.9445681226397739,
|
205 |
+
"auc_weighted": 0.9465404082666297,
|
206 |
+
"precision_macro": 0.5885969911405202,
|
207 |
+
"precision_weighted": 0.7416734521846035,
|
208 |
+
"recall_macro": 0.7381385425477333,
|
209 |
+
"recall_weighted": 0.8514986376021798,
|
210 |
+
"f1_macro": 0.6497623010487168,
|
211 |
+
"f1_weighted": 0.7903759805291908,
|
212 |
+
"hamming_loss": 0.08746586172200661,
|
213 |
+
"exact_match": 0.6282880551962052,
|
214 |
+
"sample_count": 4638
|
215 |
+
},
|
216 |
+
"1": {
|
217 |
+
"auc_macro": 0.9064189306891727,
|
218 |
+
"auc_weighted": 0.9274078123911156,
|
219 |
+
"precision_macro": 0.5769491938694048,
|
220 |
+
"precision_weighted": 0.7372462490399235,
|
221 |
+
"recall_macro": 0.6223651765807731,
|
222 |
+
"recall_weighted": 0.7957133288680509,
|
223 |
+
"f1_macro": 0.5940383621467368,
|
224 |
+
"f1_weighted": 0.7630519259035966,
|
225 |
+
"hamming_loss": 0.09734257654534952,
|
226 |
+
"exact_match": 0.6112073945696129,
|
227 |
+
"sample_count": 5193
|
228 |
+
},
|
229 |
+
"2": {
|
230 |
+
"auc_macro": 0.8945135400492461,
|
231 |
+
"auc_weighted": 0.9120120071881025,
|
232 |
+
"precision_macro": 0.5883546567568967,
|
233 |
+
"precision_weighted": 0.7471472711374241,
|
234 |
+
"recall_macro": 0.5741089328356292,
|
235 |
+
"recall_weighted": 0.7323613205966147,
|
236 |
+
"f1_macro": 0.579910490554519,
|
237 |
+
"f1_weighted": 0.7393192722268676,
|
238 |
+
"hamming_loss": 0.10030983733539892,
|
239 |
+
"exact_match": 0.6094113090627421,
|
240 |
+
"sample_count": 5164
|
241 |
+
},
|
242 |
+
"3": {
|
243 |
+
"auc_macro": 0.9135727964673032,
|
244 |
+
"auc_weighted": 0.9339502655719858,
|
245 |
+
"precision_macro": 0.5674300764951785,
|
246 |
+
"precision_weighted": 0.7452385794349706,
|
247 |
+
"recall_macro": 0.6585754182827804,
|
248 |
+
"recall_weighted": 0.8117963367501261,
|
249 |
+
"f1_macro": 0.6075512335059755,
|
250 |
+
"f1_weighted": 0.7751847838928642,
|
251 |
+
"hamming_loss": 0.09404024767801858,
|
252 |
+
"exact_match": 0.6234520123839009,
|
253 |
+
"sample_count": 5168
|
254 |
+
},
|
255 |
+
"4": {
|
256 |
+
"auc_macro": 0.9050160058685811,
|
257 |
+
"auc_weighted": 0.9286663336151794,
|
258 |
+
"precision_macro": 0.5635774868138544,
|
259 |
+
"precision_weighted": 0.7453012013072762,
|
260 |
+
"recall_macro": 0.6307198572670079,
|
261 |
+
"recall_weighted": 0.793640054127199,
|
262 |
+
"f1_macro": 0.5906173214394316,
|
263 |
+
"f1_weighted": 0.7663604150980545,
|
264 |
+
"hamming_loss": 0.0963415422403206,
|
265 |
+
"exact_match": 0.6162497576110142,
|
266 |
+
"sample_count": 5157
|
267 |
+
},
|
268 |
+
"5": {
|
269 |
+
"auc_macro": 0.9115535221829411,
|
270 |
+
"auc_weighted": 0.9337271942250184,
|
271 |
+
"precision_macro": 0.577007586897046,
|
272 |
+
"precision_weighted": 0.7468873881119108,
|
273 |
+
"recall_macro": 0.635638229939968,
|
274 |
+
"recall_weighted": 0.8080944350758853,
|
275 |
+
"f1_macro": 0.5988862551226474,
|
276 |
+
"f1_weighted": 0.7742215916662522,
|
277 |
+
"hamming_loss": 0.09350304443580774,
|
278 |
+
"exact_match": 0.6195102992615624,
|
279 |
+
"sample_count": 5146
|
280 |
+
},
|
281 |
+
"6": {
|
282 |
+
"auc_macro": 0.9045493247421005,
|
283 |
+
"auc_weighted": 0.9308415576648513,
|
284 |
+
"precision_macro": 0.591572349044604,
|
285 |
+
"precision_weighted": 0.749047954356656,
|
286 |
+
"recall_macro": 0.6294384348455582,
|
287 |
+
"recall_weighted": 0.8016820857863751,
|
288 |
+
"f1_macro": 0.6039252504591597,
|
289 |
+
"f1_weighted": 0.772582192067038,
|
290 |
+
"hamming_loss": 0.09244992295839753,
|
291 |
+
"exact_match": 0.6267334360554699,
|
292 |
+
"sample_count": 5192
|
293 |
+
}
|
294 |
+
},
|
295 |
+
"per_class": {
|
296 |
+
"toxic": {
|
297 |
+
"auc": 0.9619106577495796,
|
298 |
+
"threshold": 0.4877551020408163,
|
299 |
+
"precision": 0.8999716472923164,
|
300 |
+
"recall": 0.8968186698310449,
|
301 |
+
"f1": 0.8983923921657421,
|
302 |
+
"support": 17697,
|
303 |
+
"brier": 0.09342169378057544,
|
304 |
+
"true_positives": 15871,
|
305 |
+
"false_positives": 1764,
|
306 |
+
"true_negatives": 16197,
|
307 |
+
"false_negatives": 1826
|
308 |
+
},
|
309 |
+
"severe_toxic": {
|
310 |
+
"auc": 0.9017555053121755,
|
311 |
+
"threshold": 0.373469387755102,
|
312 |
+
"precision": 0.34626149540183926,
|
313 |
+
"recall": 0.5232628398791541,
|
314 |
+
"f1": 0.4167468719923003,
|
315 |
+
"support": 1655,
|
316 |
+
"brier": 0.05564494143865772,
|
317 |
+
"true_positives": 866,
|
318 |
+
"false_positives": 1635,
|
319 |
+
"true_negatives": 32368,
|
320 |
+
"false_negatives": 789
|
321 |
+
},
|
322 |
+
"obscene": {
|
323 |
+
"auc": 0.9247491461802884,
|
324 |
+
"threshold": 0.4551020408163265,
|
325 |
+
"precision": 0.7017099430018999,
|
326 |
+
"recall": 0.770693252956179,
|
327 |
+
"f1": 0.734585635359116,
|
328 |
+
"support": 8626,
|
329 |
+
"brier": 0.1102165916686836,
|
330 |
+
"true_positives": 6648,
|
331 |
+
"false_positives": 2826,
|
332 |
+
"true_negatives": 24206,
|
333 |
+
"false_negatives": 1978
|
334 |
+
},
|
335 |
+
"threat": {
|
336 |
+
"auc": 0.8978719938708597,
|
337 |
+
"threshold": 0.38979591836734695,
|
338 |
+
"precision": 0.43684992570579495,
|
339 |
+
"recall": 0.3868421052631579,
|
340 |
+
"f1": 0.41032798325191905,
|
341 |
+
"support": 760,
|
342 |
+
"brier": 0.03694216309848939,
|
343 |
+
"true_positives": 294,
|
344 |
+
"false_positives": 379,
|
345 |
+
"true_negatives": 34519,
|
346 |
+
"false_negatives": 466
|
347 |
+
},
|
348 |
+
"insult": {
|
349 |
+
"auc": 0.8962985964590791,
|
350 |
+
"threshold": 0.463265306122449,
|
351 |
+
"precision": 0.6568989575638184,
|
352 |
+
"recall": 0.7846847730169625,
|
353 |
+
"f1": 0.7151282280403896,
|
354 |
+
"support": 10199,
|
355 |
+
"brier": 0.1366709113756841,
|
356 |
+
"true_positives": 8003,
|
357 |
+
"false_positives": 4180,
|
358 |
+
"true_negatives": 21279,
|
359 |
+
"false_negatives": 2196
|
360 |
+
},
|
361 |
+
"identity_hate": {
|
362 |
+
"auc": 0.887086389032334,
|
363 |
+
"threshold": 0.373469387755102,
|
364 |
+
"precision": 0.423841059602649,
|
365 |
+
"recall": 0.47710330138445156,
|
366 |
+
"f1": 0.44889779559118237,
|
367 |
+
"support": 1878,
|
368 |
+
"brier": 0.06076370760519854,
|
369 |
+
"true_positives": 896,
|
370 |
+
"false_positives": 1218,
|
371 |
+
"true_negatives": 32562,
|
372 |
+
"false_negatives": 982
|
373 |
+
}
|
374 |
+
}
|
375 |
+
},
|
376 |
+
"thresholds": {
|
377 |
+
"global": {
|
378 |
+
"toxic": {
|
379 |
+
"threshold": 0.4877551020408163,
|
380 |
+
"f1_score": 0.8926184748925591,
|
381 |
+
"support": 17697,
|
382 |
+
"total_samples": 35658
|
383 |
+
},
|
384 |
+
"severe_toxic": {
|
385 |
+
"threshold": 0.373469387755102,
|
386 |
+
"f1_score": 0.41132469871513055,
|
387 |
+
"support": 1655,
|
388 |
+
"total_samples": 35658
|
389 |
+
},
|
390 |
+
"obscene": {
|
391 |
+
"threshold": 0.4551020408163265,
|
392 |
+
"f1_score": 0.726924984126118,
|
393 |
+
"support": 8626,
|
394 |
+
"total_samples": 35658
|
395 |
+
},
|
396 |
+
"threat": {
|
397 |
+
"threshold": 0.38979591836734695,
|
398 |
+
"f1_score": 0.41018044345470683,
|
399 |
+
"support": 760,
|
400 |
+
"total_samples": 35658
|
401 |
+
},
|
402 |
+
"insult": {
|
403 |
+
"threshold": 0.463265306122449,
|
404 |
+
"f1_score": 0.7104171976414078,
|
405 |
+
"support": 10199,
|
406 |
+
"total_samples": 35658
|
407 |
+
},
|
408 |
+
"identity_hate": {
|
409 |
+
"threshold": 0.373469387755102,
|
410 |
+
"f1_score": 0.4444212159518569,
|
411 |
+
"support": 1878,
|
412 |
+
"total_samples": 35658
|
413 |
+
}
|
414 |
+
},
|
415 |
+
"per_language": {
|
416 |
+
"0": {
|
417 |
+
"toxic": {
|
418 |
+
"threshold": 0.4379310344827586,
|
419 |
+
"f1_score": 0.6362062357467935,
|
420 |
+
"support": 2228,
|
421 |
+
"total_samples": 4638
|
422 |
+
},
|
423 |
+
"severe_toxic": {
|
424 |
+
"threshold": 0.4241379310344827,
|
425 |
+
"f1_score": 0.6836346572759443,
|
426 |
+
"support": 199,
|
427 |
+
"total_samples": 4638
|
428 |
+
},
|
429 |
+
"obscene": {
|
430 |
+
"threshold": 0.4655172413793103,
|
431 |
+
"f1_score": 0.4812423489705398,
|
432 |
+
"support": 1235,
|
433 |
+
"total_samples": 4638
|
434 |
+
},
|
435 |
+
"threat": {
|
436 |
+
"threshold": 0.4655172413793103,
|
437 |
+
"f1_score": 0.560716193430073,
|
438 |
+
"support": 118,
|
439 |
+
"total_samples": 4638
|
440 |
+
},
|
441 |
+
"insult": {
|
442 |
+
"threshold": 0.6586206896551723,
|
443 |
+
"f1_score": 0.6797683196093679,
|
444 |
+
"support": 1144,
|
445 |
+
"total_samples": 4638
|
446 |
+
},
|
447 |
+
"identity_hate": {
|
448 |
+
"threshold": 0.6310344827586206,
|
449 |
+
"f1_score": 0.4653856089660791,
|
450 |
+
"support": 214,
|
451 |
+
"total_samples": 4638
|
452 |
+
}
|
453 |
+
},
|
454 |
+
"1": {
|
455 |
+
"toxic": {
|
456 |
+
"threshold": 0.38275862068965516,
|
457 |
+
"f1_score": 0.5653885349662379,
|
458 |
+
"support": 2589,
|
459 |
+
"total_samples": 5193
|
460 |
+
},
|
461 |
+
"severe_toxic": {
|
462 |
+
"threshold": 0.36896551724137927,
|
463 |
+
"f1_score": 0.6303988062940857,
|
464 |
+
"support": 245,
|
465 |
+
"total_samples": 5193
|
466 |
+
},
|
467 |
+
"obscene": {
|
468 |
+
"threshold": 0.6724137931034482,
|
469 |
+
"f1_score": 0.69776888519452,
|
470 |
+
"support": 1239,
|
471 |
+
"total_samples": 5193
|
472 |
+
},
|
473 |
+
"threat": {
|
474 |
+
"threshold": 0.5482758620689655,
|
475 |
+
"f1_score": 0.49444444444444446,
|
476 |
+
"support": 106,
|
477 |
+
"total_samples": 5193
|
478 |
+
},
|
479 |
+
"insult": {
|
480 |
+
"threshold": 0.45172413793103444,
|
481 |
+
"f1_score": 0.43592427815977264,
|
482 |
+
"support": 1514,
|
483 |
+
"total_samples": 5193
|
484 |
+
},
|
485 |
+
"identity_hate": {
|
486 |
+
"threshold": 0.603448275862069,
|
487 |
+
"f1_score": 0.437278850182076,
|
488 |
+
"support": 279,
|
489 |
+
"total_samples": 5193
|
490 |
+
}
|
491 |
+
},
|
492 |
+
"2": {
|
493 |
+
"toxic": {
|
494 |
+
"threshold": 0.36896551724137927,
|
495 |
+
"f1_score": 0.5636259188109024,
|
496 |
+
"support": 2585,
|
497 |
+
"total_samples": 5164
|
498 |
+
},
|
499 |
+
"severe_toxic": {
|
500 |
+
"threshold": 0.396551724137931,
|
501 |
+
"f1_score": 0.6242565552619788,
|
502 |
+
"support": 243,
|
503 |
+
"total_samples": 5164
|
504 |
+
},
|
505 |
+
"obscene": {
|
506 |
+
"threshold": 0.6310344827586206,
|
507 |
+
"f1_score": 0.609064783177638,
|
508 |
+
"support": 1233,
|
509 |
+
"total_samples": 5164
|
510 |
+
},
|
511 |
+
"threat": {
|
512 |
+
"threshold": 0.6862068965517241,
|
513 |
+
"f1_score": 0.4331632653061225,
|
514 |
+
"support": 110,
|
515 |
+
"total_samples": 5164
|
516 |
+
},
|
517 |
+
"insult": {
|
518 |
+
"threshold": 0.6586206896551723,
|
519 |
+
"f1_score": 0.5919194590653671,
|
520 |
+
"support": 1514,
|
521 |
+
"total_samples": 5164
|
522 |
+
},
|
523 |
+
"identity_hate": {
|
524 |
+
"threshold": 0.5896551724137931,
|
525 |
+
"f1_score": 0.44181963497241983,
|
526 |
+
"support": 282,
|
527 |
+
"total_samples": 5164
|
528 |
+
}
|
529 |
+
},
|
530 |
+
"3": {
|
531 |
+
"toxic": {
|
532 |
+
"threshold": 0.35517241379310344,
|
533 |
+
"f1_score": 0.5733103161693534,
|
534 |
+
"support": 2579,
|
535 |
+
"total_samples": 5168
|
536 |
+
},
|
537 |
+
"severe_toxic": {
|
538 |
+
"threshold": 0.38275862068965516,
|
539 |
+
"f1_score": 0.6597492750378473,
|
540 |
+
"support": 243,
|
541 |
+
"total_samples": 5168
|
542 |
+
},
|
543 |
+
"obscene": {
|
544 |
+
"threshold": 0.5896551724137931,
|
545 |
+
"f1_score": 0.5803338639295222,
|
546 |
+
"support": 1234,
|
547 |
+
"total_samples": 5168
|
548 |
+
},
|
549 |
+
"threat": {
|
550 |
+
"threshold": 0.5896551724137931,
|
551 |
+
"f1_score": 0.5531975271105706,
|
552 |
+
"support": 108,
|
553 |
+
"total_samples": 5168
|
554 |
+
},
|
555 |
+
"insult": {
|
556 |
+
"threshold": 0.4103448275862069,
|
557 |
+
"f1_score": 0.43932768516388326,
|
558 |
+
"support": 1511,
|
559 |
+
"total_samples": 5168
|
560 |
+
},
|
561 |
+
"identity_hate": {
|
562 |
+
"threshold": 0.5482758620689655,
|
563 |
+
"f1_score": 0.5223443223443224,
|
564 |
+
"support": 276,
|
565 |
+
"total_samples": 5168
|
566 |
+
}
|
567 |
+
},
|
568 |
+
"4": {
|
569 |
+
"toxic": {
|
570 |
+
"threshold": 0.36896551724137927,
|
571 |
+
"f1_score": 0.5671790360963849,
|
572 |
+
"support": 2568,
|
573 |
+
"total_samples": 5157
|
574 |
+
},
|
575 |
+
"severe_toxic": {
|
576 |
+
"threshold": 0.4241379310344827,
|
577 |
+
"f1_score": 0.6449236298292902,
|
578 |
+
"support": 240,
|
579 |
+
"total_samples": 5157
|
580 |
+
},
|
581 |
+
"obscene": {
|
582 |
+
"threshold": 0.5896551724137931,
|
583 |
+
"f1_score": 0.5763915317957939,
|
584 |
+
"support": 1225,
|
585 |
+
"total_samples": 5157
|
586 |
+
},
|
587 |
+
"threat": {
|
588 |
+
"threshold": 0.5482758620689655,
|
589 |
+
"f1_score": 0.5202898550724637,
|
590 |
+
"support": 105,
|
591 |
+
"total_samples": 5157
|
592 |
+
},
|
593 |
+
"insult": {
|
594 |
+
"threshold": 0.45172413793103444,
|
595 |
+
"f1_score": 0.44168323420099964,
|
596 |
+
"support": 1501,
|
597 |
+
"total_samples": 5157
|
598 |
+
},
|
599 |
+
"identity_hate": {
|
600 |
+
"threshold": 0.5344827586206896,
|
601 |
+
"f1_score": 0.3050612442147916,
|
602 |
+
"support": 273,
|
603 |
+
"total_samples": 5157
|
604 |
+
}
|
605 |
+
},
|
606 |
+
"5": {
|
607 |
+
"toxic": {
|
608 |
+
"threshold": 0.38275862068965516,
|
609 |
+
"f1_score": 0.5689208863252881,
|
610 |
+
"support": 2572,
|
611 |
+
"total_samples": 5146
|
612 |
+
},
|
613 |
+
"severe_toxic": {
|
614 |
+
"threshold": 0.38275862068965516,
|
615 |
+
"f1_score": 0.6483406115143644,
|
616 |
+
"support": 242,
|
617 |
+
"total_samples": 5146
|
618 |
+
},
|
619 |
+
"obscene": {
|
620 |
+
"threshold": 0.6172413793103448,
|
621 |
+
"f1_score": 0.7591744574190955,
|
622 |
+
"support": 1227,
|
623 |
+
"total_samples": 5146
|
624 |
+
},
|
625 |
+
"threat": {
|
626 |
+
"threshold": 0.5896551724137931,
|
627 |
+
"f1_score": 0.48909813468905516,
|
628 |
+
"support": 106,
|
629 |
+
"total_samples": 5146
|
630 |
+
},
|
631 |
+
"insult": {
|
632 |
+
"threshold": 0.4655172413793103,
|
633 |
+
"f1_score": 0.4438765689644482,
|
634 |
+
"support": 1506,
|
635 |
+
"total_samples": 5146
|
636 |
+
},
|
637 |
+
"identity_hate": {
|
638 |
+
"threshold": 0.4655172413793103,
|
639 |
+
"f1_score": 0.57592394533571,
|
640 |
+
"support": 277,
|
641 |
+
"total_samples": 5146
|
642 |
+
}
|
643 |
+
},
|
644 |
+
"6": {
|
645 |
+
"toxic": {
|
646 |
+
"threshold": 0.396551724137931,
|
647 |
+
"f1_score": 0.5707684299142913,
|
648 |
+
"support": 2576,
|
649 |
+
"total_samples": 5192
|
650 |
+
},
|
651 |
+
"severe_toxic": {
|
652 |
+
"threshold": 0.38275862068965516,
|
653 |
+
"f1_score": 0.6300280234278585,
|
654 |
+
"support": 243,
|
655 |
+
"total_samples": 5192
|
656 |
+
},
|
657 |
+
"obscene": {
|
658 |
+
"threshold": 0.603448275862069,
|
659 |
+
"f1_score": 0.5508854395728676,
|
660 |
+
"support": 1233,
|
661 |
+
"total_samples": 5192
|
662 |
+
},
|
663 |
+
"threat": {
|
664 |
+
"threshold": 0.4655172413793103,
|
665 |
+
"f1_score": 0.6029992790194665,
|
666 |
+
"support": 107,
|
667 |
+
"total_samples": 5192
|
668 |
+
},
|
669 |
+
"insult": {
|
670 |
+
"threshold": 0.4241379310344827,
|
671 |
+
"f1_score": 0.4434943555473952,
|
672 |
+
"support": 1509,
|
673 |
+
"total_samples": 5192
|
674 |
+
},
|
675 |
+
"identity_hate": {
|
676 |
+
"threshold": 0.6586206896551723,
|
677 |
+
"f1_score": 0.4569864410513042,
|
678 |
+
"support": 277,
|
679 |
+
"total_samples": 5192
|
680 |
+
}
|
681 |
+
}
|
682 |
+
}
|
683 |
+
}
|
684 |
+
}
|
evaluation_results/eval_20250401_143401/plots/per_class_comparison.png
ADDED
![]() |
evaluation_results/eval_20250401_143401/plots/roc_all_classes.png
ADDED
![]() |
Git LFS Details
|
evaluation_results/eval_20250401_143401/plots/roc_by_language.png
ADDED
![]() |
Git LFS Details
|