Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +48 -0
- .env.template +9 -0
- .gitattributes +27 -35
- .gitignore +83 -0
- Dockerfile +29 -0
- analysis/analysis.txt +264 -0
- analysis/analyze_lang_distribution.py +336 -0
- analysis/compute_class_weights.py +499 -0
- analysis/plot_loss_curves.py +374 -0
- analysis/plot_roc_curves.py +163 -0
- app.py +262 -0
- augmentation/balance_english.py +237 -0
- augmentation/threat_augment.py +379 -0
- augmentation/toxic_augment.py +439 -0
- datacard.md +39 -0
- docker-compose.yml +13 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_identity_hate.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_insult.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_obscene.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_severe_toxic.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_threat.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_0.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_1.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_2.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_3.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_4.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_5.png +0 -0
- evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_6.png +0 -0
- evaluation_results/eval_20250208_161149/eval_params.json +7 -0
- evaluation_results/eval_20250208_161149/evaluation_results.json +2020 -0
- evaluation_results/eval_20250208_161149/plots/calibration_0.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_1.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_2.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_3.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_4.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_5.png +3 -0
- evaluation_results/eval_20250208_161149/plots/calibration_6.png +3 -0
- evaluation_results/eval_20250208_161149/plots/class_calibration.png +3 -0
- evaluation_results/eval_20250208_161149/plots/language_performance.png +0 -0
- evaluation_results/eval_20250208_161149/plots/metric_correlations.png +0 -0
- evaluation_results/eval_20250208_161149/plots/overall_calibration.png +0 -0
- evaluation_results/eval_20250208_161149/plots/performance_distributions.png +0 -0
- evaluation_results/eval_20250208_161149/predictions.npz +3 -0
- evaluation_results/eval_20250208_161149/thresholds.json +58 -0
- evaluation_results/eval_20250401_143401/eval_params.json +21 -0
- evaluation_results/eval_20250401_143401/evaluation_results.json +684 -0
- evaluation_results/eval_20250401_143401/plots/per_class_comparison.png +0 -0
- evaluation_results/eval_20250401_143401/plots/roc_all_classes.png +3 -0
- evaluation_results/eval_20250401_143401/plots/roc_by_language.png +3 -0
.dockerignore
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
env/
|
| 12 |
+
build/
|
| 13 |
+
develop-eggs/
|
| 14 |
+
dist/
|
| 15 |
+
downloads/
|
| 16 |
+
eggs/
|
| 17 |
+
.eggs/
|
| 18 |
+
lib/
|
| 19 |
+
lib64/
|
| 20 |
+
parts/
|
| 21 |
+
sdist/
|
| 22 |
+
var/
|
| 23 |
+
wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
|
| 28 |
+
# Virtual Environment
|
| 29 |
+
venv/
|
| 30 |
+
ENV/
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.idea/
|
| 34 |
+
.vscode/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
|
| 38 |
+
# Project specific
|
| 39 |
+
dataset/
|
| 40 |
+
weights/
|
| 41 |
+
wandb/
|
| 42 |
+
*.pt
|
| 43 |
+
*.pth
|
| 44 |
+
*.ckpt
|
| 45 |
+
|
| 46 |
+
# Logs
|
| 47 |
+
*.log
|
| 48 |
+
logs/
|
.env.template
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Weights & Biases API Key
|
| 2 |
+
WANDB_API_KEY=
|
| 3 |
+
|
| 4 |
+
# Model Configuration
|
| 5 |
+
BATCH_SIZE=16
|
| 6 |
+
GRAD_ACCUM_STEPS=4
|
| 7 |
+
EPOCHS=5
|
| 8 |
+
LEARNING_RATE=2e-5
|
| 9 |
+
MODEL_NAME=xlm-roberta-large
|
.gitattributes
CHANGED
|
@@ -1,35 +1,27 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
dataset/raw/MULTILINGUAL_TOXIC_DATASET_360K_7LANG.csv filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
dataset/raw/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_binary.csv filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
dataset/split/train.csv filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
dataset/processed/MULTILINGUAL_TOXIC_DATASET_AUGMENTED.csv filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
evaluation_results/eval_20250208_161149/plots/calibration_0.png filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
evaluation_results/eval_20250208_161149/plots/calibration_1.png filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
evaluation_results/eval_20250208_161149/plots/calibration_2.png filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
evaluation_results/eval_20250208_161149/plots/calibration_3.png filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
evaluation_results/eval_20250208_161149/plots/calibration_4.png filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
evaluation_results/eval_20250208_161149/plots/calibration_5.png filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
evaluation_results/eval_20250208_161149/plots/calibration_6.png filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
evaluation_results/eval_20250208_161149/plots/class_calibration.png filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
evaluation_results/eval_20250208_161149/predictions.npz filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
evaluation_results/eval_20250401_143401/plots/roc_all_classes.png filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
evaluation_results/eval_20250401_143401/plots/roc_by_language.png filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
evaluation_results/eval_20250401_143401/plots/roc_identity_hate.png filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
evaluation_results/eval_20250401_143401/plots/roc_insult.png filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
evaluation_results/eval_20250401_143401/plots/roc_obscene.png filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
evaluation_results/eval_20250401_143401/plots/roc_severe_toxic.png filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
evaluation_results/eval_20250401_143401/plots/roc_threat.png filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
evaluation_results/eval_20250401_143401/plots/roc_toxic.png filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
evaluation_results/eval_20250401_143401/predictions.npz filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
images/class_distribution.png filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
images/language_distribution.png filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
images/toxicity_by_language.png filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
images/toxicity_correlation.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
|
| 5 |
+
# Virtual environment
|
| 6 |
+
venv/
|
| 7 |
+
ENV/
|
| 8 |
+
env/
|
| 9 |
+
env.bak/
|
| 10 |
+
venv.bak/
|
| 11 |
+
|
| 12 |
+
# Gradio
|
| 13 |
+
.gradio/*
|
| 14 |
+
|
| 15 |
+
# Weights and Biases
|
| 16 |
+
weights/*
|
| 17 |
+
dataset/*
|
| 18 |
+
cache/*
|
| 19 |
+
wandb/*
|
| 20 |
+
|
| 21 |
+
# IDE and editor files
|
| 22 |
+
.idea/
|
| 23 |
+
.vscode/
|
| 24 |
+
*.swp
|
| 25 |
+
*.swo
|
| 26 |
+
|
| 27 |
+
# Jupyter Notebook checkpoints
|
| 28 |
+
.ipynb_checkpoints/
|
| 29 |
+
|
| 30 |
+
# Distribution / packaging
|
| 31 |
+
.Python
|
| 32 |
+
build/
|
| 33 |
+
develop-eggs/
|
| 34 |
+
dist/
|
| 35 |
+
downloads/
|
| 36 |
+
eggs/
|
| 37 |
+
lib/
|
| 38 |
+
lib64/
|
| 39 |
+
parts/
|
| 40 |
+
sdist/
|
| 41 |
+
var/
|
| 42 |
+
wheels/
|
| 43 |
+
share/python-wheels/
|
| 44 |
+
*.egg-info/
|
| 45 |
+
.installed.cfg
|
| 46 |
+
*.egg
|
| 47 |
+
|
| 48 |
+
# Pytest
|
| 49 |
+
.cache/
|
| 50 |
+
nosetests.xml
|
| 51 |
+
coverage.xml
|
| 52 |
+
*.cover
|
| 53 |
+
*.py,cover
|
| 54 |
+
.hypothesis/
|
| 55 |
+
.pytest_cache/
|
| 56 |
+
|
| 57 |
+
# mypy
|
| 58 |
+
.mypy_cache/
|
| 59 |
+
.dmypy.json
|
| 60 |
+
dmypy.json
|
| 61 |
+
|
| 62 |
+
# pyenv
|
| 63 |
+
.python-version
|
| 64 |
+
|
| 65 |
+
# pipenv
|
| 66 |
+
Pipfile.lock
|
| 67 |
+
|
| 68 |
+
# pyre type checker
|
| 69 |
+
.pyre/
|
| 70 |
+
|
| 71 |
+
# C extensions
|
| 72 |
+
*.so
|
| 73 |
+
|
| 74 |
+
# Backup files
|
| 75 |
+
*~
|
| 76 |
+
*.bak
|
| 77 |
+
*.tmp
|
| 78 |
+
|
| 79 |
+
#Logging
|
| 80 |
+
*.log
|
| 81 |
+
logs/
|
| 82 |
+
|
| 83 |
+
*.csv
|
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use CUDA-enabled PyTorch base image
|
| 2 |
+
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements file
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy project files
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# Create directories for data and models
|
| 22 |
+
RUN mkdir -p dataset/final_balanced weights
|
| 23 |
+
|
| 24 |
+
# Set environment variables
|
| 25 |
+
ENV PYTHONPATH=/app
|
| 26 |
+
ENV WANDB_API_KEY=""
|
| 27 |
+
|
| 28 |
+
# Default command to run training
|
| 29 |
+
CMD ["python", "model/train.py"]
|
analysis/analysis.txt
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
(venv) PS V:\Deeptanshu Lal\PROJECTS\Toxic Comment Classification> python .\analysis\analyze_lang_distribution.py
|
| 2 |
+
Reading dataset...
|
| 3 |
+
|
| 4 |
+
Dataset Overview:
|
| 5 |
+
--------------------------------------------------
|
| 6 |
+
Total number of comments: 361,228
|
| 7 |
+
Number of languages: 7
|
| 8 |
+
|
| 9 |
+
Language Distribution:
|
| 10 |
+
--------------------------------------------------
|
| 11 |
+
ru: 52,632 comments (14.57%)
|
| 12 |
+
tr: 52,558 comments (14.55%)
|
| 13 |
+
pt: 52,440 comments (14.52%)
|
| 14 |
+
es: 52,412 comments (14.51%)
|
| 15 |
+
fr: 52,368 comments (14.50%)
|
| 16 |
+
it: 52,340 comments (14.49%)
|
| 17 |
+
en: 46,478 comments (12.87%)
|
| 18 |
+
|
| 19 |
+
Class Distribution by Language:
|
| 20 |
+
--------------------------------------------------
|
| 21 |
+
|
| 22 |
+
RU (Total: 52,632 comments)
|
| 23 |
+
0 toxic classes: 26,316 (50.00%)
|
| 24 |
+
1 toxic classes: 7,688 (14.61%)
|
| 25 |
+
2 toxic classes: 8,010 (15.22%)
|
| 26 |
+
3 toxic classes: 7,103 (13.50%)
|
| 27 |
+
4 toxic classes: 2,740 (5.21%)
|
| 28 |
+
5 toxic classes: 706 (1.34%)
|
| 29 |
+
6 toxic classes: 69 (0.13%)
|
| 30 |
+
|
| 31 |
+
TR (Total: 52,558 comments)
|
| 32 |
+
0 toxic classes: 26,279 (50.00%)
|
| 33 |
+
1 toxic classes: 7,677 (14.61%)
|
| 34 |
+
2 toxic classes: 8,004 (15.23%)
|
| 35 |
+
3 toxic classes: 7,088 (13.49%)
|
| 36 |
+
4 toxic classes: 2,736 (5.21%)
|
| 37 |
+
5 toxic classes: 705 (1.34%)
|
| 38 |
+
6 toxic classes: 69 (0.13%)
|
| 39 |
+
|
| 40 |
+
PT (Total: 52,440 comments)
|
| 41 |
+
0 toxic classes: 26,220 (50.00%)
|
| 42 |
+
1 toxic classes: 7,668 (14.62%)
|
| 43 |
+
2 toxic classes: 7,977 (15.21%)
|
| 44 |
+
3 toxic classes: 7,071 (13.48%)
|
| 45 |
+
4 toxic classes: 2,732 (5.21%)
|
| 46 |
+
5 toxic classes: 703 (1.34%)
|
| 47 |
+
6 toxic classes: 69 (0.13%)
|
| 48 |
+
|
| 49 |
+
ES (Total: 52,412 comments)
|
| 50 |
+
0 toxic classes: 26,206 (50.00%)
|
| 51 |
+
1 toxic classes: 7,647 (14.59%)
|
| 52 |
+
2 toxic classes: 7,982 (15.23%)
|
| 53 |
+
3 toxic classes: 7,069 (13.49%)
|
| 54 |
+
4 toxic classes: 2,737 (5.22%)
|
| 55 |
+
5 toxic classes: 702 (1.34%)
|
| 56 |
+
6 toxic classes: 69 (0.13%)
|
| 57 |
+
|
| 58 |
+
FR (Total: 52,368 comments)
|
| 59 |
+
0 toxic classes: 26,184 (50.00%)
|
| 60 |
+
1 toxic classes: 7,626 (14.56%)
|
| 61 |
+
2 toxic classes: 7,990 (15.26%)
|
| 62 |
+
3 toxic classes: 7,066 (13.49%)
|
| 63 |
+
4 toxic classes: 2,728 (5.21%)
|
| 64 |
+
5 toxic classes: 705 (1.35%)
|
| 65 |
+
6 toxic classes: 69 (0.13%)
|
| 66 |
+
|
| 67 |
+
IT (Total: 52,340 comments)
|
| 68 |
+
0 toxic classes: 26,170 (50.00%)
|
| 69 |
+
1 toxic classes: 7,652 (14.62%)
|
| 70 |
+
2 toxic classes: 7,967 (15.22%)
|
| 71 |
+
3 toxic classes: 7,057 (13.48%)
|
| 72 |
+
4 toxic classes: 2,722 (5.20%)
|
| 73 |
+
5 toxic classes: 703 (1.34%)
|
| 74 |
+
6 toxic classes: 69 (0.13%)
|
| 75 |
+
|
| 76 |
+
EN (Total: 46,478 comments)
|
| 77 |
+
0 toxic classes: 22,989 (49.46%)
|
| 78 |
+
1 toxic classes: 8,499 (18.29%)
|
| 79 |
+
2 toxic classes: 5,604 (12.06%)
|
| 80 |
+
3 toxic classes: 6,391 (13.75%)
|
| 81 |
+
4 toxic classes: 2,395 (5.15%)
|
| 82 |
+
5 toxic classes: 553 (1.19%)
|
| 83 |
+
6 toxic classes: 47 (0.10%)
|
| 84 |
+
|
| 85 |
+
Detailed Toxicity Analysis by Language:
|
| 86 |
+
--------------------------------------------------
|
| 87 |
+
|
| 88 |
+
RU (Total: 52,632 comments)
|
| 89 |
+
- Toxic:
|
| 90 |
+
Count: 25,954 (49.31%)
|
| 91 |
+
95% CI: [48.89%, 49.74%]
|
| 92 |
+
- Severe Toxic:
|
| 93 |
+
Count: 2,441 (4.64%)
|
| 94 |
+
95% CI: [4.46%, 4.82%]
|
| 95 |
+
- Obscene:
|
| 96 |
+
Count: 12,432 (23.62%)
|
| 97 |
+
95% CI: [23.26%, 23.98%]
|
| 98 |
+
- Threat:
|
| 99 |
+
Count: 1,075 (2.04%)
|
| 100 |
+
95% CI: [1.92%, 2.16%]
|
| 101 |
+
- Insult:
|
| 102 |
+
Count: 15,207 (28.89%)
|
| 103 |
+
95% CI: [28.51%, 29.28%]
|
| 104 |
+
- Identity Hate:
|
| 105 |
+
Count: 2,812 (5.34%)
|
| 106 |
+
95% CI: [5.15%, 5.53%]
|
| 107 |
+
|
| 108 |
+
TR (Total: 52,558 comments)
|
| 109 |
+
- Toxic:
|
| 110 |
+
Count: 25,908 (49.29%)
|
| 111 |
+
95% CI: [48.87%, 49.72%]
|
| 112 |
+
- Severe Toxic:
|
| 113 |
+
Count: 2,439 (4.64%)
|
| 114 |
+
95% CI: [4.46%, 4.82%]
|
| 115 |
+
- Obscene:
|
| 116 |
+
Count: 12,411 (23.61%)
|
| 117 |
+
95% CI: [23.25%, 23.98%]
|
| 118 |
+
- Threat:
|
| 119 |
+
Count: 1,077 (2.05%)
|
| 120 |
+
95% CI: [1.93%, 2.17%]
|
| 121 |
+
- Insult:
|
| 122 |
+
Count: 15,170 (28.86%)
|
| 123 |
+
95% CI: [28.48%, 29.25%]
|
| 124 |
+
- Identity Hate:
|
| 125 |
+
Count: 2,827 (5.38%)
|
| 126 |
+
95% CI: [5.19%, 5.57%]
|
| 127 |
+
|
| 128 |
+
PT (Total: 52,440 comments)
|
| 129 |
+
- Toxic:
|
| 130 |
+
Count: 25,841 (49.28%)
|
| 131 |
+
95% CI: [48.85%, 49.71%]
|
| 132 |
+
- Severe Toxic:
|
| 133 |
+
Count: 2,432 (4.64%)
|
| 134 |
+
95% CI: [4.46%, 4.82%]
|
| 135 |
+
- Obscene:
|
| 136 |
+
Count: 12,395 (23.64%)
|
| 137 |
+
95% CI: [23.27%, 24.00%]
|
| 138 |
+
- Threat:
|
| 139 |
+
Count: 1,080 (2.06%)
|
| 140 |
+
95% CI: [1.94%, 2.18%]
|
| 141 |
+
- Insult:
|
| 142 |
+
Count: 15,143 (28.88%)
|
| 143 |
+
95% CI: [28.49%, 29.26%]
|
| 144 |
+
- Identity Hate:
|
| 145 |
+
Count: 2,801 (5.34%)
|
| 146 |
+
95% CI: [5.15%, 5.53%]
|
| 147 |
+
|
| 148 |
+
ES (Total: 52,412 comments)
|
| 149 |
+
- Toxic:
|
| 150 |
+
Count: 25,874 (49.37%)
|
| 151 |
+
95% CI: [48.94%, 49.79%]
|
| 152 |
+
- Severe Toxic:
|
| 153 |
+
Count: 2,432 (4.64%)
|
| 154 |
+
95% CI: [4.46%, 4.82%]
|
| 155 |
+
- Obscene:
|
| 156 |
+
Count: 12,388 (23.64%)
|
| 157 |
+
95% CI: [23.27%, 24.00%]
|
| 158 |
+
- Threat:
|
| 159 |
+
Count: 1,073 (2.05%)
|
| 160 |
+
95% CI: [1.93%, 2.17%]
|
| 161 |
+
- Insult:
|
| 162 |
+
Count: 15,140 (28.89%)
|
| 163 |
+
95% CI: [28.50%, 29.27%]
|
| 164 |
+
- Identity Hate:
|
| 165 |
+
Count: 2,783 (5.31%)
|
| 166 |
+
95% CI: [5.12%, 5.50%]
|
| 167 |
+
|
| 168 |
+
FR (Total: 52,368 comments)
|
| 169 |
+
- Toxic:
|
| 170 |
+
Count: 25,877 (49.41%)
|
| 171 |
+
95% CI: [48.99%, 49.84%]
|
| 172 |
+
- Severe Toxic:
|
| 173 |
+
Count: 2,428 (4.64%)
|
| 174 |
+
95% CI: [4.46%, 4.82%]
|
| 175 |
+
- Obscene:
|
| 176 |
+
Count: 12,379 (23.64%)
|
| 177 |
+
95% CI: [23.27%, 24.00%]
|
| 178 |
+
- Threat:
|
| 179 |
+
Count: 1,066 (2.04%)
|
| 180 |
+
95% CI: [1.91%, 2.16%]
|
| 181 |
+
- Insult:
|
| 182 |
+
Count: 15,131 (28.89%)
|
| 183 |
+
95% CI: [28.51%, 29.28%]
|
| 184 |
+
- Identity Hate:
|
| 185 |
+
Count: 2,774 (5.30%)
|
| 186 |
+
95% CI: [5.11%, 5.49%]
|
| 187 |
+
|
| 188 |
+
IT (Total: 52,340 comments)
|
| 189 |
+
- Toxic:
|
| 190 |
+
Count: 25,827 (49.34%)
|
| 191 |
+
95% CI: [48.92%, 49.77%]
|
| 192 |
+
- Severe Toxic:
|
| 193 |
+
Count: 2,429 (4.64%)
|
| 194 |
+
95% CI: [4.46%, 4.82%]
|
| 195 |
+
- Obscene:
|
| 196 |
+
Count: 12,341 (23.58%)
|
| 197 |
+
95% CI: [23.21%, 23.94%]
|
| 198 |
+
- Threat:
|
| 199 |
+
Count: 1,077 (2.06%)
|
| 200 |
+
95% CI: [1.94%, 2.18%]
|
| 201 |
+
- Insult:
|
| 202 |
+
Count: 15,118 (28.88%)
|
| 203 |
+
95% CI: [28.50%, 29.27%]
|
| 204 |
+
- Identity Hate:
|
| 205 |
+
Count: 2,782 (5.32%)
|
| 206 |
+
95% CI: [5.12%, 5.51%]
|
| 207 |
+
|
| 208 |
+
EN (Total: 46,478 comments)
|
| 209 |
+
- Toxic:
|
| 210 |
+
Count: 22,343 (48.07%)
|
| 211 |
+
95% CI: [47.62%, 48.53%]
|
| 212 |
+
- Severe Toxic:
|
| 213 |
+
Count: 1,986 (4.27%)
|
| 214 |
+
95% CI: [4.09%, 4.46%]
|
| 215 |
+
- Obscene:
|
| 216 |
+
Count: 12,356 (26.58%)
|
| 217 |
+
95% CI: [26.18%, 26.99%]
|
| 218 |
+
- Threat:
|
| 219 |
+
Count: 1,204 (2.59%)
|
| 220 |
+
95% CI: [2.45%, 2.73%]
|
| 221 |
+
- Insult:
|
| 222 |
+
Count: 11,475 (24.69%)
|
| 223 |
+
95% CI: [24.30%, 25.08%]
|
| 224 |
+
- Identity Hate:
|
| 225 |
+
Count: 2,143 (4.61%)
|
| 226 |
+
95% CI: [4.42%, 4.80%]
|
| 227 |
+
|
| 228 |
+
Statistical Analysis:
|
| 229 |
+
--------------------------------------------------
|
| 230 |
+
|
| 231 |
+
Chi-square test for number of toxic classes by language:
|
| 232 |
+
Chi-square statistic: 654.28
|
| 233 |
+
p-value: 0.0000000000
|
| 234 |
+
Significant at α=0.05: Yes
|
| 235 |
+
|
| 236 |
+
Chi-square test for Toxic:
|
| 237 |
+
Chi-square statistic: 26.10
|
| 238 |
+
p-value: 0.0002136602
|
| 239 |
+
Significant at α=0.05: Yes
|
| 240 |
+
|
| 241 |
+
Chi-square test for Severe Toxic:
|
| 242 |
+
Chi-square statistic: 12.38
|
| 243 |
+
p-value: 0.0540052211
|
| 244 |
+
Significant at α=0.05: No
|
| 245 |
+
|
| 246 |
+
Chi-square test for Obscene:
|
| 247 |
+
Chi-square statistic: 195.12
|
| 248 |
+
p-value: 0.0000000000
|
| 249 |
+
Significant at α=0.05: Yes
|
| 250 |
+
|
| 251 |
+
Chi-square test for Threat:
|
| 252 |
+
Chi-square statistic: 57.45
|
| 253 |
+
p-value: 0.0000000001
|
| 254 |
+
Significant at α=0.05: Yes
|
| 255 |
+
|
| 256 |
+
Chi-square test for Insult:
|
| 257 |
+
Chi-square statistic: 350.72
|
| 258 |
+
p-value: 0.0000000000
|
| 259 |
+
Significant at α=0.05: Yes
|
| 260 |
+
|
| 261 |
+
Chi-square test for Identity Hate:
|
| 262 |
+
Chi-square statistic: 42.77
|
| 263 |
+
p-value: 0.0000001295
|
| 264 |
+
Significant at α=0.05: Yes
|
analysis/analyze_lang_distribution.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import seaborn as sns
|
| 4 |
+
import numpy as np
|
| 5 |
+
from scipy import stats
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
def set_style():
|
| 9 |
+
"""Set the style for all plots"""
|
| 10 |
+
# Use a basic style instead of seaborn
|
| 11 |
+
plt.style.use('default')
|
| 12 |
+
|
| 13 |
+
# Custom style settings
|
| 14 |
+
plt.rcParams['figure.figsize'] = (12, 6)
|
| 15 |
+
plt.rcParams['font.size'] = 10
|
| 16 |
+
plt.rcParams['axes.titlesize'] = 14
|
| 17 |
+
plt.rcParams['axes.labelsize'] = 12
|
| 18 |
+
plt.rcParams['axes.grid'] = True
|
| 19 |
+
plt.rcParams['grid.alpha'] = 0.3
|
| 20 |
+
|
| 21 |
+
# Custom color palette
|
| 22 |
+
colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF99CC', '#99FFCC', '#FFB366']
|
| 23 |
+
return colors
|
| 24 |
+
|
| 25 |
+
def create_language_distribution_plot(df, lang_dist, lang_percent, colors, image_dir):
|
| 26 |
+
"""Create and save language distribution plot"""
|
| 27 |
+
plt.figure(figsize=(14, 8))
|
| 28 |
+
|
| 29 |
+
# Create bar positions
|
| 30 |
+
x = np.arange(len(lang_dist))
|
| 31 |
+
|
| 32 |
+
# Create bars with language names as x-ticks
|
| 33 |
+
bars = plt.bar(x, lang_dist.values, color=colors)
|
| 34 |
+
plt.title('Language Distribution in Multilingual Toxic Comment Dataset', pad=20)
|
| 35 |
+
plt.xlabel('Language', labelpad=10)
|
| 36 |
+
plt.ylabel('Number of Comments', labelpad=10)
|
| 37 |
+
|
| 38 |
+
# Set x-ticks to language names
|
| 39 |
+
plt.xticks(x, lang_dist.index, rotation=45)
|
| 40 |
+
|
| 41 |
+
# Add value labels on top of each bar with increased spacing
|
| 42 |
+
for i, bar in enumerate(bars):
|
| 43 |
+
height = bar.get_height()
|
| 44 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + (max(lang_dist.values) * 0.01),
|
| 45 |
+
f'{int(height):,}\n({lang_percent.values[i]:.1f}%)',
|
| 46 |
+
ha='center', va='bottom', fontsize=10)
|
| 47 |
+
|
| 48 |
+
# Add some padding to the top of the plot
|
| 49 |
+
plt.margins(y=0.2)
|
| 50 |
+
|
| 51 |
+
plt.tight_layout()
|
| 52 |
+
plt.savefig(os.path.join(image_dir, 'language_distribution.png'), dpi=300, bbox_inches='tight')
|
| 53 |
+
plt.close()
|
| 54 |
+
|
| 55 |
+
def create_toxicity_heatmap(df, toxicity_cols, image_dir):
|
| 56 |
+
"""Create and save toxicity correlation heatmap"""
|
| 57 |
+
plt.figure(figsize=(12, 10))
|
| 58 |
+
|
| 59 |
+
# Calculate correlation and sort
|
| 60 |
+
correlation = df[toxicity_cols].corr()
|
| 61 |
+
|
| 62 |
+
# Sort correlation matrix by mean correlation value
|
| 63 |
+
mean_corr = correlation.mean()
|
| 64 |
+
sorted_cols = mean_corr.sort_values(ascending=False).index
|
| 65 |
+
correlation = correlation.loc[sorted_cols, sorted_cols]
|
| 66 |
+
|
| 67 |
+
# Create heatmap with better styling
|
| 68 |
+
im = plt.imshow(correlation, cmap='RdYlBu_r', aspect='equal', vmin=0, vmax=1)
|
| 69 |
+
plt.colorbar(im, label='Correlation Coefficient')
|
| 70 |
+
|
| 71 |
+
# Add text annotations with conditional formatting
|
| 72 |
+
for i in range(len(correlation)):
|
| 73 |
+
for j in range(len(correlation)):
|
| 74 |
+
corr_value = correlation.iloc[i, j]
|
| 75 |
+
# Choose text color based on background
|
| 76 |
+
text_color = 'white' if abs(corr_value) > 0.7 else 'black'
|
| 77 |
+
# Make diagonal elements bold
|
| 78 |
+
fontweight = 'bold' if i == j else 'normal'
|
| 79 |
+
plt.text(j, i, f'{corr_value:.2f}',
|
| 80 |
+
ha='center', va='center',
|
| 81 |
+
color=text_color,
|
| 82 |
+
fontweight=fontweight,
|
| 83 |
+
fontsize=10)
|
| 84 |
+
|
| 85 |
+
# Improve title and labels
|
| 86 |
+
plt.title('Correlation between Different Types of Toxicity\n(Sorted by Average Correlation)',
|
| 87 |
+
pad=20, fontsize=14)
|
| 88 |
+
|
| 89 |
+
# Format axis labels
|
| 90 |
+
formatted_labels = [col.replace('_', ' ').title() for col in correlation.columns]
|
| 91 |
+
plt.xticks(range(len(formatted_labels)), formatted_labels, rotation=45, ha='right')
|
| 92 |
+
plt.yticks(range(len(formatted_labels)), formatted_labels)
|
| 93 |
+
|
| 94 |
+
# Add gridlines
|
| 95 |
+
plt.grid(False)
|
| 96 |
+
|
| 97 |
+
# Adjust layout
|
| 98 |
+
plt.tight_layout()
|
| 99 |
+
plt.savefig(os.path.join(image_dir, 'toxicity_correlation.png'), dpi=300, bbox_inches='tight')
|
| 100 |
+
plt.close()
|
| 101 |
+
|
| 102 |
+
def create_toxicity_by_language_plot(df, lang_dist, toxicity_cols, colors, image_dir):
|
| 103 |
+
"""Create and save toxicity distribution by language plot"""
|
| 104 |
+
plt.figure(figsize=(15, 8))
|
| 105 |
+
|
| 106 |
+
x = np.arange(len(lang_dist.index))
|
| 107 |
+
width = 0.15
|
| 108 |
+
multiplier = 0
|
| 109 |
+
|
| 110 |
+
for attribute, color in zip(toxicity_cols, colors):
|
| 111 |
+
# Calculate percentage of toxic comments (any value > 0)
|
| 112 |
+
attribute_means = [(df[df['lang'] == lang][attribute] > 0).mean() * 100
|
| 113 |
+
for lang in lang_dist.index]
|
| 114 |
+
|
| 115 |
+
offset = width * multiplier
|
| 116 |
+
rects = plt.bar(x + offset, attribute_means, width,
|
| 117 |
+
label=attribute.replace('_', ' ').title(),
|
| 118 |
+
color=color, alpha=0.8)
|
| 119 |
+
|
| 120 |
+
# Add value labels on the bars
|
| 121 |
+
for rect in rects:
|
| 122 |
+
height = rect.get_height()
|
| 123 |
+
plt.text(rect.get_x() + rect.get_width()/2., height,
|
| 124 |
+
f'{height:.1f}%', ha='center', va='bottom', fontsize=8)
|
| 125 |
+
|
| 126 |
+
multiplier += 1
|
| 127 |
+
|
| 128 |
+
plt.xlabel('Language')
|
| 129 |
+
plt.ylabel('Percentage of Toxic Comments (%)')
|
| 130 |
+
plt.title('Distribution of Toxicity Types by Language')
|
| 131 |
+
plt.xticks(x + width * 2.5, lang_dist.index, rotation=45)
|
| 132 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1, 1))
|
| 133 |
+
plt.grid(True, alpha=0.3)
|
| 134 |
+
|
| 135 |
+
plt.tight_layout()
|
| 136 |
+
plt.savefig(os.path.join(image_dir, 'toxicity_by_language.png'), dpi=300, bbox_inches='tight')
|
| 137 |
+
plt.close()
|
| 138 |
+
|
| 139 |
+
def create_class_distribution_plot(df, lang_dist, image_dir):
|
| 140 |
+
"""Create and save class distribution across languages plot"""
|
| 141 |
+
plt.figure(figsize=(16, 10))
|
| 142 |
+
|
| 143 |
+
# Define toxicity columns and their display names
|
| 144 |
+
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 145 |
+
display_names = [col.replace('_', ' ').title() for col in toxicity_cols]
|
| 146 |
+
|
| 147 |
+
# Calculate class distribution for each language
|
| 148 |
+
class_dist = {}
|
| 149 |
+
non_toxic_dist = {} # Store non-toxic percentages
|
| 150 |
+
for lang in lang_dist.index:
|
| 151 |
+
lang_df = df[df['lang'] == lang]
|
| 152 |
+
total = len(lang_df)
|
| 153 |
+
|
| 154 |
+
# Create a binary matrix of toxicity flags
|
| 155 |
+
toxic_matrix = lang_df[toxicity_cols].astype(bool)
|
| 156 |
+
|
| 157 |
+
# Calculate non-toxic percentage (comments with no toxic flags)
|
| 158 |
+
non_toxic_mask = ~toxic_matrix.any(axis=1)
|
| 159 |
+
non_toxic_percent = (non_toxic_mask.sum() / total) * 100
|
| 160 |
+
non_toxic_dist[lang] = non_toxic_percent
|
| 161 |
+
|
| 162 |
+
# Calculate percentages for each toxicity type
|
| 163 |
+
class_dist[lang] = [(toxic_matrix[col].sum() / total) * 100 for col in toxicity_cols]
|
| 164 |
+
|
| 165 |
+
# Create stacked bar chart
|
| 166 |
+
x = np.arange(len(lang_dist.index))
|
| 167 |
+
|
| 168 |
+
# Use a color scheme with an additional color for non-toxic
|
| 169 |
+
colors = plt.cm.Set3(np.linspace(0, 1, len(toxicity_cols) + 1))
|
| 170 |
+
|
| 171 |
+
# First, plot non-toxic comments
|
| 172 |
+
non_toxic_values = [non_toxic_dist[lang] for lang in lang_dist.index]
|
| 173 |
+
non_toxic_bar = plt.bar(x, non_toxic_values, label='Non-Toxic', color=colors[0], alpha=0.9)
|
| 174 |
+
|
| 175 |
+
# Add percentage labels for non-toxic
|
| 176 |
+
for j, v in enumerate(non_toxic_values):
|
| 177 |
+
if v > 1: # Show all values above 1%
|
| 178 |
+
plt.text(x[j], v/2, f'{v:.1f}%',
|
| 179 |
+
ha='center', va='center',
|
| 180 |
+
color='black',
|
| 181 |
+
fontweight='bold',
|
| 182 |
+
fontsize=9)
|
| 183 |
+
|
| 184 |
+
# Initialize bottom array with non-toxic values
|
| 185 |
+
bottom = np.array(non_toxic_values)
|
| 186 |
+
|
| 187 |
+
# Then plot toxic categories
|
| 188 |
+
bars = [non_toxic_bar]
|
| 189 |
+
for i, (col, display_name) in enumerate(zip(toxicity_cols, display_names)):
|
| 190 |
+
values = [class_dist[lang][i] for lang in lang_dist.index]
|
| 191 |
+
bar = plt.bar(x, values, bottom=bottom, label=display_name, color=colors[i+1], alpha=0.9)
|
| 192 |
+
bars.append(bar)
|
| 193 |
+
|
| 194 |
+
# Add percentage labels for all values > 1%
|
| 195 |
+
for j, v in enumerate(values):
|
| 196 |
+
if v > 1: # Show all values above 1%
|
| 197 |
+
center = bottom[j] + v/2
|
| 198 |
+
text_color = 'black' if v > 10 else 'black'
|
| 199 |
+
plt.text(x[j], center, f'{v:.1f}%',
|
| 200 |
+
ha='center', va='center',
|
| 201 |
+
color=text_color,
|
| 202 |
+
fontweight='bold',
|
| 203 |
+
fontsize=9)
|
| 204 |
+
bottom = bottom + np.array(values) # Update bottom array correctly
|
| 205 |
+
|
| 206 |
+
plt.xlabel('Language', labelpad=10, fontsize=12)
|
| 207 |
+
plt.ylabel('Percentage of Comments', labelpad=10, fontsize=12)
|
| 208 |
+
plt.title('Distribution of Non-Toxic and Toxic Comments by Language', pad=20, fontsize=14)
|
| 209 |
+
plt.xticks(x, lang_dist.index, rotation=45, fontsize=10)
|
| 210 |
+
|
| 211 |
+
# Adjust legend
|
| 212 |
+
plt.legend(title='Comment Types',
|
| 213 |
+
bbox_to_anchor=(1.15, 1),
|
| 214 |
+
loc='upper left',
|
| 215 |
+
fontsize=10,
|
| 216 |
+
title_fontsize=12)
|
| 217 |
+
|
| 218 |
+
# Add grid for better readability
|
| 219 |
+
plt.grid(True, axis='y', alpha=0.3)
|
| 220 |
+
|
| 221 |
+
# Adjust layout to prevent label cutoff
|
| 222 |
+
plt.margins(y=0.1)
|
| 223 |
+
plt.tight_layout()
|
| 224 |
+
plt.savefig(os.path.join(image_dir, 'class_distribution.png'), dpi=300, bbox_inches='tight')
|
| 225 |
+
plt.close()
|
| 226 |
+
|
| 227 |
+
def analyze_language_distribution():
|
| 228 |
+
"""Analyze language distribution and toxicity patterns in the dataset"""
|
| 229 |
+
# Create images directory if it doesn't exist
|
| 230 |
+
image_dir = 'images'
|
| 231 |
+
os.makedirs(image_dir, exist_ok=True)
|
| 232 |
+
|
| 233 |
+
# Set style and get color palette
|
| 234 |
+
colors = set_style()
|
| 235 |
+
|
| 236 |
+
# Read the dataset
|
| 237 |
+
print("Reading dataset...")
|
| 238 |
+
input_file = 'dataset/split/train.csv'
|
| 239 |
+
df = pd.read_csv(input_file)
|
| 240 |
+
|
| 241 |
+
# Get language distribution
|
| 242 |
+
lang_dist = df['lang'].value_counts()
|
| 243 |
+
lang_percent = df['lang'].value_counts(normalize=True) * 100
|
| 244 |
+
|
| 245 |
+
# Print basic statistics
|
| 246 |
+
print("\nDataset Overview:")
|
| 247 |
+
print("-" * 50)
|
| 248 |
+
print("Input file: ", input_file)
|
| 249 |
+
print(f"Total number of comments: {len(df):,}")
|
| 250 |
+
print(f"Number of languages: {df['lang'].nunique()}")
|
| 251 |
+
|
| 252 |
+
print("\nLanguage Distribution:")
|
| 253 |
+
print("-" * 50)
|
| 254 |
+
for lang, count in lang_dist.items():
|
| 255 |
+
print(f"{lang}: {count:,} comments ({lang_percent[lang]:.2f}%)")
|
| 256 |
+
|
| 257 |
+
# Create language distribution plot
|
| 258 |
+
create_language_distribution_plot(df, lang_dist, lang_percent, colors, image_dir)
|
| 259 |
+
|
| 260 |
+
# Analyze toxicity
|
| 261 |
+
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 262 |
+
|
| 263 |
+
# Create correlation heatmap
|
| 264 |
+
create_toxicity_heatmap(df, toxicity_cols, image_dir)
|
| 265 |
+
|
| 266 |
+
# Create toxicity by language plot
|
| 267 |
+
create_toxicity_by_language_plot(df, lang_dist, toxicity_cols, colors, image_dir)
|
| 268 |
+
|
| 269 |
+
# Create class distribution plot
|
| 270 |
+
create_class_distribution_plot(df, lang_dist, image_dir)
|
| 271 |
+
|
| 272 |
+
# Print class distribution statistics
|
| 273 |
+
print("\nClass Distribution by Language:")
|
| 274 |
+
print("-" * 50)
|
| 275 |
+
|
| 276 |
+
for lang in lang_dist.index:
|
| 277 |
+
lang_df = df[df['lang'] == lang]
|
| 278 |
+
total = len(lang_df)
|
| 279 |
+
|
| 280 |
+
print(f"\n{lang.upper()} (Total: {total:,} comments)")
|
| 281 |
+
|
| 282 |
+
# Count comments by number of toxic classes
|
| 283 |
+
toxic_counts = lang_df[toxicity_cols].astype(bool).sum(axis=1)
|
| 284 |
+
class_dist = toxic_counts.value_counts().sort_index()
|
| 285 |
+
|
| 286 |
+
for n_classes, count in class_dist.items():
|
| 287 |
+
percentage = (count / total) * 100
|
| 288 |
+
print(f"{n_classes} toxic classes: {count:,} ({percentage:.2f}%)")
|
| 289 |
+
|
| 290 |
+
# Detailed toxicity analysis by language
|
| 291 |
+
print("\nDetailed Toxicity Analysis by Language:")
|
| 292 |
+
print("-" * 50)
|
| 293 |
+
|
| 294 |
+
for lang in lang_dist.index:
|
| 295 |
+
lang_df = df[df['lang'] == lang]
|
| 296 |
+
print(f"\n{lang.upper()} (Total: {len(lang_df):,} comments)")
|
| 297 |
+
|
| 298 |
+
# Calculate toxicity statistics
|
| 299 |
+
for col in toxicity_cols:
|
| 300 |
+
toxic_count = (lang_df[col] > 0).sum()
|
| 301 |
+
toxic_percent = (toxic_count / len(lang_df)) * 100
|
| 302 |
+
|
| 303 |
+
# Calculate confidence interval
|
| 304 |
+
ci = stats.norm.interval(0.95,
|
| 305 |
+
loc=toxic_percent/100,
|
| 306 |
+
scale=np.sqrt((toxic_percent/100 * (1-toxic_percent/100)) / len(lang_df)))
|
| 307 |
+
ci_lower, ci_upper = ci[0] * 100, ci[1] * 100
|
| 308 |
+
|
| 309 |
+
print(f"- {col.replace('_', ' ').title()}:")
|
| 310 |
+
print(f" Count: {toxic_count:,} ({toxic_percent:.2f}%)")
|
| 311 |
+
print(f" 95% CI: [{ci_lower:.2f}%, {ci_upper:.2f}%]")
|
| 312 |
+
|
| 313 |
+
# Statistical tests
|
| 314 |
+
print("\nStatistical Analysis:")
|
| 315 |
+
print("-" * 50)
|
| 316 |
+
|
| 317 |
+
# Chi-square test for independence between language and number of toxic classes
|
| 318 |
+
toxic_class_counts = pd.crosstab(df['lang'], df[toxicity_cols].astype(bool).sum(axis=1))
|
| 319 |
+
chi2, p_value, _, _ = stats.chi2_contingency(toxic_class_counts)
|
| 320 |
+
print("\nChi-square test for number of toxic classes by language:")
|
| 321 |
+
print(f"Chi-square statistic: {chi2:.2f}")
|
| 322 |
+
print(f"p-value: {p_value:.10f}")
|
| 323 |
+
print(f"Significant at α=0.05: {'Yes' if p_value < 0.05 else 'No'}")
|
| 324 |
+
|
| 325 |
+
# Chi-square test for each toxicity type
|
| 326 |
+
for col in toxicity_cols:
|
| 327 |
+
binary_col = (df[col] > 0).astype(int)
|
| 328 |
+
contingency_table = pd.crosstab(df['lang'], binary_col)
|
| 329 |
+
chi2, p_value, _, _ = stats.chi2_contingency(contingency_table)
|
| 330 |
+
print(f"\nChi-square test for {col.replace('_', ' ').title()}:")
|
| 331 |
+
print(f"Chi-square statistic: {chi2:.2f}")
|
| 332 |
+
print(f"p-value: {p_value:.10f}")
|
| 333 |
+
print(f"Significant at α=0.05: {'Yes' if p_value < 0.05 else 'No'}")
|
| 334 |
+
|
| 335 |
+
if __name__ == "__main__":
|
| 336 |
+
analyze_language_distribution()
|
analysis/compute_class_weights.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, List
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
# Configure logging
|
| 8 |
+
logging.basicConfig(
|
| 9 |
+
level=logging.INFO,
|
| 10 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
def validate_parameters(params: Dict) -> Dict:
|
| 14 |
+
"""
|
| 15 |
+
Validate weight calculation parameters to prevent dangerous combinations.
|
| 16 |
+
Includes validation for focal loss parameters.
|
| 17 |
+
"""
|
| 18 |
+
# Check for dangerous weight scaling
|
| 19 |
+
if params['boost_factor'] * params['max_weight'] > 30:
|
| 20 |
+
raise ValueError(f"Dangerous weight scaling detected: boost_factor * max_weight = {params['boost_factor'] * params['max_weight']}")
|
| 21 |
+
|
| 22 |
+
# Validate focal loss parameters
|
| 23 |
+
if not 0 < params['gamma'] <= 5.0:
|
| 24 |
+
raise ValueError(f"Invalid gamma value: {params['gamma']}. Must be in (0, 5.0]")
|
| 25 |
+
|
| 26 |
+
if not 0 < params['alpha'] < 1:
|
| 27 |
+
raise ValueError(f"Invalid alpha value: {params['alpha']}. Must be in (0, 1)")
|
| 28 |
+
|
| 29 |
+
# Check for potentially unstable combinations
|
| 30 |
+
if params['gamma'] > 3.0 and params['boost_factor'] > 1.5:
|
| 31 |
+
logging.warning(f"Potentially unstable combination: high gamma ({params['gamma']}) with high boost factor ({params['boost_factor']})")
|
| 32 |
+
|
| 33 |
+
if params['alpha'] > 0.4 and params['boost_factor'] > 1.5:
|
| 34 |
+
logging.warning(f"Potentially unstable combination: high alpha ({params['alpha']}) with high boost factor ({params['boost_factor']})")
|
| 35 |
+
|
| 36 |
+
return params
|
| 37 |
+
|
| 38 |
+
def calculate_safe_weights(
|
| 39 |
+
support_0: int,
|
| 40 |
+
support_1: int,
|
| 41 |
+
max_weight: float = 15.0,
|
| 42 |
+
min_weight: float = 0.5,
|
| 43 |
+
gamma: float = 2.0,
|
| 44 |
+
alpha: float = 0.25,
|
| 45 |
+
boost_factor: float = 1.0,
|
| 46 |
+
num_classes: int = 6,
|
| 47 |
+
lang: str = None,
|
| 48 |
+
toxicity_type: str = None
|
| 49 |
+
) -> Dict[str, float]:
|
| 50 |
+
"""
|
| 51 |
+
Calculate class weights with focal loss and adaptive scaling.
|
| 52 |
+
Uses focal loss components for better handling of imbalanced classes
|
| 53 |
+
while preserving language-specific adjustments.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
support_0: Number of negative samples
|
| 57 |
+
support_1: Number of positive samples
|
| 58 |
+
max_weight: Maximum allowed weight
|
| 59 |
+
min_weight: Minimum allowed weight
|
| 60 |
+
gamma: Focal loss gamma parameter for down-weighting easy examples
|
| 61 |
+
alpha: Focal loss alpha parameter for balancing positive/negative classes
|
| 62 |
+
boost_factor: Optional boost for specific classes
|
| 63 |
+
num_classes: Number of toxicity classes (default=6)
|
| 64 |
+
lang: Language code for language-specific constraints
|
| 65 |
+
toxicity_type: Type of toxicity for class-specific constraints
|
| 66 |
+
"""
|
| 67 |
+
# Input validation with detailed error messages
|
| 68 |
+
if support_0 < 0 or support_1 < 0:
|
| 69 |
+
raise ValueError(f"Negative sample counts: support_0={support_0}, support_1={support_1}")
|
| 70 |
+
|
| 71 |
+
eps = 1e-7 # Small epsilon for numerical stability
|
| 72 |
+
total = support_0 + support_1 + eps
|
| 73 |
+
|
| 74 |
+
# Handle empty dataset case
|
| 75 |
+
if total <= eps:
|
| 76 |
+
logging.warning(f"Empty dataset for {toxicity_type} in {lang}")
|
| 77 |
+
return {
|
| 78 |
+
"0": 1.0,
|
| 79 |
+
"1": 1.0,
|
| 80 |
+
"support_0": support_0,
|
| 81 |
+
"support_1": support_1,
|
| 82 |
+
"raw_weight_1": 1.0,
|
| 83 |
+
"calculation_metadata": {
|
| 84 |
+
"formula": "default_weights_empty_dataset",
|
| 85 |
+
"constraints_applied": ["empty_dataset_fallback"]
|
| 86 |
+
}
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
# Handle zero support cases safely
|
| 90 |
+
if support_1 == 0:
|
| 91 |
+
logging.warning(f"No positive samples for {toxicity_type} in {lang}")
|
| 92 |
+
return {
|
| 93 |
+
"0": 1.0,
|
| 94 |
+
"1": max_weight,
|
| 95 |
+
"support_0": support_0,
|
| 96 |
+
"support_1": support_1,
|
| 97 |
+
"raw_weight_1": max_weight,
|
| 98 |
+
"calculation_metadata": {
|
| 99 |
+
"formula": "max_weight_no_positives",
|
| 100 |
+
"constraints_applied": ["no_positives_fallback"]
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# Determine effective maximum weight based on class and language
|
| 105 |
+
if lang == 'en' and toxicity_type == 'threat':
|
| 106 |
+
effective_max = min(max_weight, 15.0) # Absolute cap for EN threat
|
| 107 |
+
elif toxicity_type == 'identity_hate':
|
| 108 |
+
effective_max = min(max_weight, 10.0) # Cap for identity hate
|
| 109 |
+
else:
|
| 110 |
+
effective_max = max_weight
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
# Calculate class frequencies
|
| 114 |
+
freq_1 = support_1 / total
|
| 115 |
+
freq_0 = support_0 / total
|
| 116 |
+
|
| 117 |
+
# Focal loss components
|
| 118 |
+
pt = freq_1 + eps # Probability of target class
|
| 119 |
+
modulating_factor = (1 - pt) ** gamma
|
| 120 |
+
balanced_alpha = alpha / (alpha + (1 - alpha) * (1 - pt))
|
| 121 |
+
|
| 122 |
+
# Base weight calculation with focal loss
|
| 123 |
+
raw_weight_1 = balanced_alpha * modulating_factor / (pt + eps)
|
| 124 |
+
|
| 125 |
+
# Apply adaptive scaling for severe classes
|
| 126 |
+
if toxicity_type in ['threat', 'identity_hate']:
|
| 127 |
+
severity_factor = (1 + np.log1p(total) / np.log1p(support_1)) / 2
|
| 128 |
+
raw_weight_1 *= severity_factor
|
| 129 |
+
|
| 130 |
+
# Apply boost factor
|
| 131 |
+
raw_weight_1 *= boost_factor
|
| 132 |
+
|
| 133 |
+
# Detect potential numerical instability
|
| 134 |
+
if not np.isfinite(raw_weight_1):
|
| 135 |
+
logging.error(f"Numerical instability detected for {toxicity_type} in {lang}")
|
| 136 |
+
raw_weight_1 = effective_max
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logging.error(f"Weight calculation error: {str(e)}")
|
| 140 |
+
raw_weight_1 = effective_max
|
| 141 |
+
|
| 142 |
+
# Apply safety limits with effective maximum
|
| 143 |
+
weight_1 = min(effective_max, max(min_weight, raw_weight_1))
|
| 144 |
+
weight_0 = 1.0 # Reference weight for majority class
|
| 145 |
+
|
| 146 |
+
# Round weights for consistency and to prevent floating point issues
|
| 147 |
+
weight_1 = round(float(weight_1), 3)
|
| 148 |
+
weight_0 = round(float(weight_0), 3)
|
| 149 |
+
|
| 150 |
+
return {
|
| 151 |
+
"0": weight_0,
|
| 152 |
+
"1": weight_1,
|
| 153 |
+
"support_0": support_0,
|
| 154 |
+
"support_1": support_1,
|
| 155 |
+
"raw_weight_1": round(float(raw_weight_1), 3),
|
| 156 |
+
"calculation_metadata": {
|
| 157 |
+
"formula": "focal_loss_with_adaptive_scaling",
|
| 158 |
+
"gamma": round(float(gamma), 3),
|
| 159 |
+
"alpha": round(float(alpha), 3),
|
| 160 |
+
"final_pt": round(float(pt), 4),
|
| 161 |
+
"effective_max": round(float(effective_max), 3),
|
| 162 |
+
"modulating_factor": round(float(modulating_factor), 4),
|
| 163 |
+
"balanced_alpha": round(float(balanced_alpha), 4),
|
| 164 |
+
"severity_adjusted": toxicity_type in ['threat', 'identity_hate'],
|
| 165 |
+
"boost_factor": round(float(boost_factor), 3),
|
| 166 |
+
"constraints_applied": [
|
| 167 |
+
f"max_weight={effective_max}",
|
| 168 |
+
f"boost={boost_factor}",
|
| 169 |
+
f"numerical_stability=enforced",
|
| 170 |
+
f"adaptive_scaling={'enabled' if toxicity_type in ['threat', 'identity_hate'] else 'disabled'}"
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
def get_language_specific_params(lang: str, toxicity_type: str) -> Dict:
|
| 176 |
+
"""
|
| 177 |
+
Get language and class specific parameters for weight calculation.
|
| 178 |
+
Includes focal loss parameters and their adjustments per language/class.
|
| 179 |
+
"""
|
| 180 |
+
# Default parameters
|
| 181 |
+
default_params = {
|
| 182 |
+
"max_weight": 15.0,
|
| 183 |
+
"min_weight": 0.5,
|
| 184 |
+
"boost_factor": 1.0,
|
| 185 |
+
"gamma": 2.0, # Default focal loss gamma
|
| 186 |
+
"alpha": 0.25 # Default focal loss alpha
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
# Updated language-specific adjustments based on analysis
|
| 190 |
+
lang_adjustments = {
|
| 191 |
+
"en": {
|
| 192 |
+
"toxic": {
|
| 193 |
+
"boost_factor": 1.67, # To achieve ~3.5x weight
|
| 194 |
+
"gamma": 2.5 # More focus on hard examples for main class
|
| 195 |
+
},
|
| 196 |
+
"threat": {
|
| 197 |
+
"max_weight": 15.0, # Absolute maximum cap
|
| 198 |
+
"gamma": 3.0, # Higher gamma for severe class
|
| 199 |
+
"alpha": 0.3 # Slightly higher alpha for better recall
|
| 200 |
+
},
|
| 201 |
+
"identity_hate": {
|
| 202 |
+
"max_weight": 5.0, # Reduced from 8.4
|
| 203 |
+
"gamma": 3.0, # Higher gamma for severe class
|
| 204 |
+
"alpha": 0.3 # Slightly higher alpha for better recall
|
| 205 |
+
},
|
| 206 |
+
"severe_toxic": {
|
| 207 |
+
"max_weight": 3.9, # Corrected weight
|
| 208 |
+
"gamma": 2.5 # Moderate gamma for balance
|
| 209 |
+
}
|
| 210 |
+
},
|
| 211 |
+
"tr": {
|
| 212 |
+
"threat": {
|
| 213 |
+
"max_weight": 12.8, # Aligned with cross-lingual ratio
|
| 214 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
| 215 |
+
},
|
| 216 |
+
"identity_hate": {
|
| 217 |
+
"max_weight": 6.2, # Adjusted for balance
|
| 218 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
| 219 |
+
}
|
| 220 |
+
},
|
| 221 |
+
"ru": {
|
| 222 |
+
"threat": {
|
| 223 |
+
"max_weight": 12.8, # Aligned with cross-lingual ratio
|
| 224 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
| 225 |
+
},
|
| 226 |
+
"identity_hate": {
|
| 227 |
+
"max_weight": 7.0, # Adjusted for balance
|
| 228 |
+
"gamma": 2.8 # Slightly lower than EN for stability
|
| 229 |
+
}
|
| 230 |
+
},
|
| 231 |
+
"fr": {
|
| 232 |
+
"toxic": {
|
| 233 |
+
"boost_factor": 1.2, # To achieve ~2.2x weight
|
| 234 |
+
"gamma": 2.2 # Lower gamma for better stability
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
# Get language-specific params and validate
|
| 240 |
+
lang_params = lang_adjustments.get(lang, {})
|
| 241 |
+
class_params = lang_params.get(toxicity_type, {})
|
| 242 |
+
merged_params = {**default_params, **class_params}
|
| 243 |
+
|
| 244 |
+
return validate_parameters(merged_params)
|
| 245 |
+
|
| 246 |
+
def check_cross_language_consistency(lang_weights: Dict) -> List[str]:
|
| 247 |
+
"""
|
| 248 |
+
Check for consistency of weights across languages.
|
| 249 |
+
Returns a list of warnings for significant disparities.
|
| 250 |
+
"""
|
| 251 |
+
warnings = []
|
| 252 |
+
baseline = lang_weights['en']
|
| 253 |
+
|
| 254 |
+
for lang in lang_weights:
|
| 255 |
+
if lang == 'en':
|
| 256 |
+
continue
|
| 257 |
+
|
| 258 |
+
for cls in ['threat', 'identity_hate']:
|
| 259 |
+
if cls in lang_weights[lang] and cls in baseline:
|
| 260 |
+
ratio = lang_weights[lang][cls]['1'] / baseline[cls]['1']
|
| 261 |
+
if ratio > 1.5 or ratio < 0.67:
|
| 262 |
+
warning = f"Large {cls} weight disparity: {lang} vs en ({ratio:.2f}x)"
|
| 263 |
+
warnings.append(warning)
|
| 264 |
+
logging.warning(warning)
|
| 265 |
+
|
| 266 |
+
return warnings
|
| 267 |
+
|
| 268 |
+
def validate_dataset_balance(df: pd.DataFrame) -> bool:
|
| 269 |
+
"""
|
| 270 |
+
Validate dataset balance across languages.
|
| 271 |
+
Returns False if imbalance exceeds threshold.
|
| 272 |
+
"""
|
| 273 |
+
sample_counts = df.groupby('lang').size()
|
| 274 |
+
cv = sample_counts.std() / sample_counts.mean()
|
| 275 |
+
|
| 276 |
+
if cv > 0.15: # 15% threshold for coefficient of variation
|
| 277 |
+
logging.error(f"Dataset language imbalance exceeds 15% (CV={cv:.2%})")
|
| 278 |
+
for lang, count in sample_counts.items():
|
| 279 |
+
logging.warning(f"{lang}: {count:,} samples ({count/len(df):.1%})")
|
| 280 |
+
return False
|
| 281 |
+
return True
|
| 282 |
+
|
| 283 |
+
def validate_weights(lang_weights: Dict) -> List[str]:
|
| 284 |
+
"""
|
| 285 |
+
Ensure weights meet multilingual safety criteria.
|
| 286 |
+
Validates weight ratios and focal loss parameters across languages.
|
| 287 |
+
|
| 288 |
+
Args:
|
| 289 |
+
lang_weights: Dictionary of weights per language and class
|
| 290 |
+
|
| 291 |
+
Returns:
|
| 292 |
+
List of validation warnings
|
| 293 |
+
|
| 294 |
+
Raises:
|
| 295 |
+
ValueError: If weights violate safety constraints
|
| 296 |
+
"""
|
| 297 |
+
warnings = []
|
| 298 |
+
|
| 299 |
+
for lang in lang_weights:
|
| 300 |
+
for cls in lang_weights[lang]:
|
| 301 |
+
w1 = lang_weights[lang][cls]['1']
|
| 302 |
+
w0 = lang_weights[lang][cls]['0']
|
| 303 |
+
|
| 304 |
+
# Check weight ratio sanity
|
| 305 |
+
ratio = w1 / w0
|
| 306 |
+
if ratio > 30:
|
| 307 |
+
raise ValueError(
|
| 308 |
+
f"Dangerous weight ratio {ratio:.1f}x for {lang} {cls}. "
|
| 309 |
+
f"Weight_1={w1:.3f}, Weight_0={w0:.3f}"
|
| 310 |
+
)
|
| 311 |
+
elif ratio > 20:
|
| 312 |
+
warnings.append(
|
| 313 |
+
f"High weight ratio {ratio:.1f}x for {lang} {cls}"
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# Check focal parameter boundaries
|
| 317 |
+
metadata = lang_weights[lang][cls]['calculation_metadata']
|
| 318 |
+
gamma = metadata.get('gamma', 0.0)
|
| 319 |
+
alpha = metadata.get('alpha', 0.0)
|
| 320 |
+
|
| 321 |
+
if gamma > 5.0:
|
| 322 |
+
raise ValueError(
|
| 323 |
+
f"Unsafe gamma={gamma:.1f} for {lang} {cls}. "
|
| 324 |
+
f"Must be <= 5.0"
|
| 325 |
+
)
|
| 326 |
+
elif gamma > 4.0:
|
| 327 |
+
warnings.append(
|
| 328 |
+
f"High gamma={gamma:.1f} for {lang} {cls}"
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
if alpha > 0.9:
|
| 332 |
+
raise ValueError(
|
| 333 |
+
f"Unsafe alpha={alpha:.2f} for {lang} {cls}. "
|
| 334 |
+
f"Must be < 0.9"
|
| 335 |
+
)
|
| 336 |
+
elif alpha > 0.7:
|
| 337 |
+
warnings.append(
|
| 338 |
+
f"High alpha={alpha:.2f} for {lang} {cls}"
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
# Check for combined risk factors
|
| 342 |
+
if gamma > 3.0 and ratio > 15:
|
| 343 |
+
warnings.append(
|
| 344 |
+
f"Risky combination for {lang} {cls}: "
|
| 345 |
+
f"gamma={gamma:.1f}, ratio={ratio:.1f}x"
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
return warnings
|
| 349 |
+
|
| 350 |
+
def compute_language_weights(df: pd.DataFrame) -> Dict:
|
| 351 |
+
"""
|
| 352 |
+
Compute weights with inter-language normalization to ensure consistent
|
| 353 |
+
weighting across languages while preserving relative class relationships.
|
| 354 |
+
"""
|
| 355 |
+
# Validate dataset balance first
|
| 356 |
+
if not validate_dataset_balance(df):
|
| 357 |
+
logging.warning("Proceeding with imbalanced dataset - weights may need manual adjustment")
|
| 358 |
+
|
| 359 |
+
lang_weights = {}
|
| 360 |
+
toxicity_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 361 |
+
|
| 362 |
+
# First pass: calculate raw weights for each language and class
|
| 363 |
+
logging.info("\nFirst pass: Calculating raw weights")
|
| 364 |
+
for lang in df['lang'].unique():
|
| 365 |
+
logging.info(f"\nProcessing language: {lang}")
|
| 366 |
+
lang_df = df[df['lang'] == lang]
|
| 367 |
+
lang_weights[lang] = {}
|
| 368 |
+
|
| 369 |
+
for col in toxicity_columns:
|
| 370 |
+
y = lang_df[col].values.astype(np.int32)
|
| 371 |
+
support_0 = int((y == 0).sum())
|
| 372 |
+
support_1 = int((y == 1).sum())
|
| 373 |
+
|
| 374 |
+
params = get_language_specific_params(lang, col)
|
| 375 |
+
weights = calculate_safe_weights(
|
| 376 |
+
support_0=support_0,
|
| 377 |
+
support_1=support_1,
|
| 378 |
+
max_weight=params['max_weight'],
|
| 379 |
+
min_weight=params['min_weight'],
|
| 380 |
+
gamma=params['gamma'],
|
| 381 |
+
alpha=params['alpha'],
|
| 382 |
+
boost_factor=params['boost_factor'],
|
| 383 |
+
lang=lang,
|
| 384 |
+
toxicity_type=col
|
| 385 |
+
)
|
| 386 |
+
lang_weights[lang][col] = weights
|
| 387 |
+
|
| 388 |
+
# Log initial weights
|
| 389 |
+
logging.info(f" {col} - Initial weights:")
|
| 390 |
+
logging.info(f" Class 0: {weights['0']:.3f}, samples: {support_0:,}")
|
| 391 |
+
logging.info(f" Class 1: {weights['1']:.3f}, samples: {support_1:,}")
|
| 392 |
+
|
| 393 |
+
# Second pass: normalize weights across languages
|
| 394 |
+
logging.info("\nSecond pass: Normalizing weights across languages")
|
| 395 |
+
for col in toxicity_columns:
|
| 396 |
+
# Find maximum weight for this toxicity type across all languages
|
| 397 |
+
max_weight = max(
|
| 398 |
+
lang_weights[lang][col]['1']
|
| 399 |
+
for lang in lang_weights
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
if max_weight > 0: # Prevent division by zero
|
| 403 |
+
logging.info(f"\nNormalizing {col}:")
|
| 404 |
+
logging.info(f" Maximum weight across languages: {max_weight:.3f}")
|
| 405 |
+
|
| 406 |
+
# Normalize weights for each language
|
| 407 |
+
for lang in lang_weights:
|
| 408 |
+
original_weight = lang_weights[lang][col]['1']
|
| 409 |
+
|
| 410 |
+
# Normalize and rescale
|
| 411 |
+
normalized_weight = (original_weight / max_weight) * 15.0
|
| 412 |
+
|
| 413 |
+
# Update weight while preserving metadata
|
| 414 |
+
lang_weights[lang][col]['raw_weight_1'] = original_weight
|
| 415 |
+
lang_weights[lang][col]['1'] = round(normalized_weight, 3)
|
| 416 |
+
|
| 417 |
+
# Add normalization info to metadata
|
| 418 |
+
lang_weights[lang][col]['calculation_metadata'].update({
|
| 419 |
+
'normalization': {
|
| 420 |
+
'original_weight': round(float(original_weight), 3),
|
| 421 |
+
'max_weight_across_langs': round(float(max_weight), 3),
|
| 422 |
+
'normalization_factor': round(float(15.0 / max_weight), 3)
|
| 423 |
+
}
|
| 424 |
+
})
|
| 425 |
+
|
| 426 |
+
# Log normalization results
|
| 427 |
+
logging.info(f" {lang}: {original_weight:.3f} → {normalized_weight:.3f}")
|
| 428 |
+
|
| 429 |
+
# Validate final weights
|
| 430 |
+
logging.info("\nValidating final weights:")
|
| 431 |
+
for col in toxicity_columns:
|
| 432 |
+
weights_range = [
|
| 433 |
+
lang_weights[lang][col]['1']
|
| 434 |
+
for lang in lang_weights
|
| 435 |
+
]
|
| 436 |
+
logging.info(f" {col}: range [{min(weights_range):.3f}, {max(weights_range):.3f}]")
|
| 437 |
+
|
| 438 |
+
# Validate weights meet safety criteria
|
| 439 |
+
validation_warnings = validate_weights(lang_weights)
|
| 440 |
+
if validation_warnings:
|
| 441 |
+
logging.warning("\nWeight validation warnings:")
|
| 442 |
+
for warning in validation_warnings:
|
| 443 |
+
logging.warning(f" {warning}")
|
| 444 |
+
|
| 445 |
+
# Check cross-language consistency
|
| 446 |
+
consistency_warnings = check_cross_language_consistency(lang_weights)
|
| 447 |
+
if consistency_warnings:
|
| 448 |
+
logging.warning("\nCross-language consistency warnings:")
|
| 449 |
+
for warning in consistency_warnings:
|
| 450 |
+
logging.warning(f" {warning}")
|
| 451 |
+
|
| 452 |
+
return lang_weights
|
| 453 |
+
|
| 454 |
+
def main():
|
| 455 |
+
# Load dataset
|
| 456 |
+
input_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_AUGMENTED.csv'
|
| 457 |
+
logging.info(f"Loading dataset from {input_file}")
|
| 458 |
+
df = pd.read_csv(input_file)
|
| 459 |
+
|
| 460 |
+
# Compute weights
|
| 461 |
+
lang_weights = compute_language_weights(df)
|
| 462 |
+
|
| 463 |
+
# Add metadata
|
| 464 |
+
weights_data = {
|
| 465 |
+
"metadata": {
|
| 466 |
+
"total_samples": len(df),
|
| 467 |
+
"language_distribution": df['lang'].value_counts().to_dict(),
|
| 468 |
+
"weight_calculation": {
|
| 469 |
+
"method": "focal_loss_with_adaptive_scaling",
|
| 470 |
+
"parameters": {
|
| 471 |
+
"default_max_weight": 15.0,
|
| 472 |
+
"default_min_weight": 0.5,
|
| 473 |
+
"language_specific_adjustments": True
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
+
},
|
| 477 |
+
"weights": lang_weights
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
# Save weights
|
| 481 |
+
output_file = 'weights/language_class_weights.json'
|
| 482 |
+
logging.info(f"\nSaving weights to {output_file}")
|
| 483 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 484 |
+
json.dump(weights_data, f, indent=2, ensure_ascii=False)
|
| 485 |
+
|
| 486 |
+
logging.info("\nWeight calculation complete!")
|
| 487 |
+
|
| 488 |
+
# Print summary statistics
|
| 489 |
+
logging.info("\nSummary of adjustments made:")
|
| 490 |
+
for lang in lang_weights:
|
| 491 |
+
for col in ['threat', 'identity_hate']:
|
| 492 |
+
if col in lang_weights[lang]:
|
| 493 |
+
weight = lang_weights[lang][col]['1']
|
| 494 |
+
raw = lang_weights[lang][col]['raw_weight_1']
|
| 495 |
+
if raw != weight:
|
| 496 |
+
logging.info(f"{lang} {col}: Adjusted from {raw:.2f}× to {weight:.2f}×")
|
| 497 |
+
|
| 498 |
+
if __name__ == "__main__":
|
| 499 |
+
main()
|
analysis/plot_loss_curves.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import torch
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import numpy as np
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import logging
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from torch.utils.data import DataLoader
|
| 9 |
+
import sys
|
| 10 |
+
import os
|
| 11 |
+
import wandb
|
| 12 |
+
from transformers import get_linear_schedule_with_warmup
|
| 13 |
+
|
| 14 |
+
# Add project root to path
|
| 15 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 16 |
+
|
| 17 |
+
from model.training_config import TrainingConfig
|
| 18 |
+
from model.language_aware_transformer import LanguageAwareTransformer
|
| 19 |
+
from model.train import ToxicDataset
|
| 20 |
+
from transformers import XLMRobertaTokenizer
|
| 21 |
+
|
| 22 |
+
# Set up logging
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
level=logging.INFO,
|
| 25 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 26 |
+
)
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
def setup_plot_style():
|
| 30 |
+
"""Configure plot styling"""
|
| 31 |
+
plt.style.use('seaborn-darkgrid')
|
| 32 |
+
plt.rcParams['figure.figsize'] = (12, 12)
|
| 33 |
+
plt.rcParams['font.size'] = 12
|
| 34 |
+
|
| 35 |
+
def setup_wandb():
|
| 36 |
+
"""Initialize wandb for validation tracking"""
|
| 37 |
+
try:
|
| 38 |
+
wandb.init(
|
| 39 |
+
project="toxic-comment-classification",
|
| 40 |
+
name=f"validation-analysis-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
| 41 |
+
config={
|
| 42 |
+
"analysis_type": "validation_loss",
|
| 43 |
+
"timestamp": datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 44 |
+
}
|
| 45 |
+
)
|
| 46 |
+
logger.info("Initialized wandb logging")
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"Error initializing wandb: {str(e)}")
|
| 49 |
+
raise
|
| 50 |
+
|
| 51 |
+
def load_model_and_data():
|
| 52 |
+
"""Load the model and validation data"""
|
| 53 |
+
try:
|
| 54 |
+
# Initialize config with training settings
|
| 55 |
+
config = TrainingConfig(
|
| 56 |
+
batch_size=16,
|
| 57 |
+
num_workers=16,
|
| 58 |
+
lr=2e-5,
|
| 59 |
+
weight_decay=0.01,
|
| 60 |
+
max_grad_norm=1.0,
|
| 61 |
+
warmup_ratio=0.1,
|
| 62 |
+
label_smoothing=0.01,
|
| 63 |
+
|
| 64 |
+
mixed_precision="fp16",
|
| 65 |
+
activation_checkpointing=True,
|
| 66 |
+
epochs=1 # Number of validation epochs
|
| 67 |
+
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Load validation data
|
| 71 |
+
logger.info("Loading validation and test data...")
|
| 72 |
+
val_df = pd.read_csv("dataset/split/val.csv")
|
| 73 |
+
test_df = pd.read_csv("dataset/split/test.csv")
|
| 74 |
+
combined_df = pd.concat([val_df, test_df])
|
| 75 |
+
tokenizer = XLMRobertaTokenizer.from_pretrained(config.model_name)
|
| 76 |
+
combined_dataset = ToxicDataset(combined_df, tokenizer, config, mode='combined')
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Create combined dataloader
|
| 80 |
+
combined_loader = DataLoader(
|
| 81 |
+
combined_dataset,
|
| 82 |
+
batch_size=config.batch_size,
|
| 83 |
+
shuffle=True, # Enable shuffling
|
| 84 |
+
num_workers=config.num_workers,
|
| 85 |
+
pin_memory=True,
|
| 86 |
+
drop_last=False # Keep all samples
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
# Log dataloader config to wandb
|
| 90 |
+
if wandb.run is not None:
|
| 91 |
+
wandb.config.update({
|
| 92 |
+
'shuffle': True,
|
| 93 |
+
'drop_last': False,
|
| 94 |
+
'total_validation_steps': len(combined_loader),
|
| 95 |
+
'total_validation_samples': len(combined_dataset)
|
| 96 |
+
})
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# Load model
|
| 100 |
+
logger.info("Loading model...")
|
| 101 |
+
model = LanguageAwareTransformer(
|
| 102 |
+
num_labels=len(config.toxicity_labels),
|
| 103 |
+
model_name=config.model_name
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Load latest checkpoint
|
| 107 |
+
checkpoint_path = Path('weights/toxic_classifier_xlm-roberta-large/pytorch_model.bin')
|
| 108 |
+
if checkpoint_path.exists():
|
| 109 |
+
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
| 110 |
+
model.load_state_dict(checkpoint)
|
| 111 |
+
logger.info("Loaded model checkpoint")
|
| 112 |
+
else:
|
| 113 |
+
raise FileNotFoundError("No checkpoint found")
|
| 114 |
+
|
| 115 |
+
# Move model to GPU if available
|
| 116 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 117 |
+
model = model.to(device)
|
| 118 |
+
|
| 119 |
+
# Setup optimizer
|
| 120 |
+
param_groups = config.get_param_groups(model)
|
| 121 |
+
optimizer = torch.optim.AdamW(param_groups)
|
| 122 |
+
|
| 123 |
+
# Setup scheduler
|
| 124 |
+
total_steps = len(combined_loader) * config.epochs
|
| 125 |
+
warmup_steps = int(total_steps * config.warmup_ratio)
|
| 126 |
+
|
| 127 |
+
scheduler = get_linear_schedule_with_warmup(
|
| 128 |
+
optimizer,
|
| 129 |
+
num_warmup_steps=warmup_steps,
|
| 130 |
+
num_training_steps=total_steps
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Initialize gradient scaler for mixed precision
|
| 134 |
+
scaler = torch.cuda.amp.GradScaler(enabled=config.mixed_precision == "fp16")
|
| 135 |
+
|
| 136 |
+
# Log model configuration to wandb
|
| 137 |
+
if wandb.run is not None:
|
| 138 |
+
wandb.config.update({
|
| 139 |
+
'model_name': config.model_name,
|
| 140 |
+
'batch_size': config.batch_size,
|
| 141 |
+
'learning_rate': config.lr,
|
| 142 |
+
'weight_decay': config.weight_decay,
|
| 143 |
+
'max_grad_norm': config.max_grad_norm,
|
| 144 |
+
'warmup_ratio': config.warmup_ratio,
|
| 145 |
+
'label_smoothing': config.label_smoothing,
|
| 146 |
+
'mixed_precision': config.mixed_precision,
|
| 147 |
+
'num_workers': config.num_workers,
|
| 148 |
+
'activation_checkpointing': config.activation_checkpointing,
|
| 149 |
+
'validation_epochs': config.epochs
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
return model, combined_loader, device, optimizer, scheduler, scaler, config
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.error(f"Error loading model and data: {str(e)}")
|
| 157 |
+
raise
|
| 158 |
+
|
| 159 |
+
def collect_validation_losses(model, combined_loader, device, optimizer, scheduler, scaler, config):
|
| 160 |
+
"""Run validation and collect step losses across multiple epochs"""
|
| 161 |
+
try:
|
| 162 |
+
logger.warning("This is an analysis run on combined val+test data - model will not be saved or updated")
|
| 163 |
+
# Ensure we're in eval mode and no gradients are computed
|
| 164 |
+
model.eval()
|
| 165 |
+
for param in model.parameters():
|
| 166 |
+
param.requires_grad = False
|
| 167 |
+
|
| 168 |
+
all_losses = []
|
| 169 |
+
epoch_losses = []
|
| 170 |
+
|
| 171 |
+
for epoch in range(config.epochs):
|
| 172 |
+
logger.info(f"\nStarting validation epoch {epoch+1}/{config.epochs}")
|
| 173 |
+
total_loss = 0
|
| 174 |
+
num_batches = len(combined_loader)
|
| 175 |
+
epoch_start_time = datetime.now()
|
| 176 |
+
|
| 177 |
+
with torch.no_grad(): # Extra safety to ensure no gradients
|
| 178 |
+
for step, batch in enumerate(combined_loader):
|
| 179 |
+
# Move batch to device
|
| 180 |
+
batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v
|
| 181 |
+
for k, v in batch.items()}
|
| 182 |
+
|
| 183 |
+
# Forward pass with mixed precision
|
| 184 |
+
with torch.cuda.amp.autocast(enabled=config.mixed_precision != "no"):
|
| 185 |
+
outputs = model(**batch)
|
| 186 |
+
loss = outputs['loss'].item()
|
| 187 |
+
|
| 188 |
+
total_loss += loss
|
| 189 |
+
|
| 190 |
+
# Calculate running averages
|
| 191 |
+
avg_loss = total_loss / (step + 1)
|
| 192 |
+
|
| 193 |
+
# Get learning rates
|
| 194 |
+
lrs = [group['lr'] for group in optimizer.param_groups]
|
| 195 |
+
|
| 196 |
+
# Log to wandb
|
| 197 |
+
wandb.log({
|
| 198 |
+
'val/step_loss': loss,
|
| 199 |
+
'val/running_avg_loss': avg_loss,
|
| 200 |
+
'val/progress': (step + 1) / num_batches * 100,
|
| 201 |
+
'val/learning_rate': lrs[0], # Base learning rate
|
| 202 |
+
'val/batch_size': config.batch_size,
|
| 203 |
+
'val/epoch': epoch + 1,
|
| 204 |
+
'val/global_step': epoch * num_batches + step
|
| 205 |
+
})
|
| 206 |
+
|
| 207 |
+
# Log progress
|
| 208 |
+
if step % 10 == 0:
|
| 209 |
+
elapsed_time = datetime.now() - epoch_start_time
|
| 210 |
+
steps_per_sec = (step + 1) / elapsed_time.total_seconds()
|
| 211 |
+
remaining_steps = num_batches - (step + 1)
|
| 212 |
+
eta_seconds = remaining_steps / steps_per_sec if steps_per_sec > 0 else 0
|
| 213 |
+
|
| 214 |
+
logger.info(
|
| 215 |
+
f"Epoch [{epoch+1}/{config.epochs}] "
|
| 216 |
+
f"Step [{step+1}/{num_batches}] "
|
| 217 |
+
f"Loss: {loss:.4f} "
|
| 218 |
+
f"Avg Loss: {avg_loss:.4f} "
|
| 219 |
+
f"LR: {lrs[0]:.2e} "
|
| 220 |
+
f"ETA: {int(eta_seconds)}s"
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Calculate epoch statistics
|
| 224 |
+
epoch_avg_loss = total_loss / num_batches
|
| 225 |
+
epoch_losses.append({
|
| 226 |
+
'epoch': epoch + 1,
|
| 227 |
+
'avg_loss': epoch_avg_loss,
|
| 228 |
+
'elapsed_time': (datetime.now() - epoch_start_time).total_seconds()
|
| 229 |
+
})
|
| 230 |
+
|
| 231 |
+
# Log epoch metrics to wandb
|
| 232 |
+
wandb.log({
|
| 233 |
+
'val/epoch_avg_loss': epoch_avg_loss,
|
| 234 |
+
'val/epoch_number': epoch + 1,
|
| 235 |
+
'val/epoch_time': epoch_losses[-1]['elapsed_time']
|
| 236 |
+
})
|
| 237 |
+
|
| 238 |
+
# Clear GPU memory after each epoch
|
| 239 |
+
torch.cuda.empty_cache()
|
| 240 |
+
|
| 241 |
+
return epoch_losses
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
logger.error(f"Error collecting validation losses: {str(e)}")
|
| 245 |
+
raise
|
| 246 |
+
|
| 247 |
+
def plot_validation_losses(epoch_losses):
|
| 248 |
+
"""Plot validation epoch losses"""
|
| 249 |
+
try:
|
| 250 |
+
setup_plot_style()
|
| 251 |
+
|
| 252 |
+
# Create figure
|
| 253 |
+
fig, ax = plt.subplots()
|
| 254 |
+
|
| 255 |
+
# Extract data
|
| 256 |
+
epochs = [d['epoch'] for d in epoch_losses]
|
| 257 |
+
losses = [d['avg_loss'] for d in epoch_losses]
|
| 258 |
+
|
| 259 |
+
# Plot epoch losses
|
| 260 |
+
ax.plot(epochs, losses, 'go-', label='Epoch Average Loss', linewidth=2, markersize=8)
|
| 261 |
+
|
| 262 |
+
# Add trend line
|
| 263 |
+
z = np.polyfit(epochs, losses, 1)
|
| 264 |
+
p = np.poly1d(z)
|
| 265 |
+
ax.plot(epochs, p(epochs), "r--", alpha=0.8, label='Loss Trend')
|
| 266 |
+
|
| 267 |
+
# Customize plot
|
| 268 |
+
ax.set_title('Validation Epoch Losses')
|
| 269 |
+
ax.set_xlabel('Epoch')
|
| 270 |
+
ax.set_ylabel('Average Loss')
|
| 271 |
+
ax.legend()
|
| 272 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
| 273 |
+
|
| 274 |
+
# Adjust layout
|
| 275 |
+
plt.tight_layout()
|
| 276 |
+
|
| 277 |
+
# Create output directory if it doesn't exist
|
| 278 |
+
output_dir = Path('analysis/plots')
|
| 279 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 280 |
+
|
| 281 |
+
# Save plot
|
| 282 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 283 |
+
output_path = output_dir / f'validation_losses_{timestamp}.png'
|
| 284 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
| 285 |
+
logger.info(f"Plot saved to {output_path}")
|
| 286 |
+
|
| 287 |
+
# Log plot to wandb
|
| 288 |
+
wandb.log({
|
| 289 |
+
"val/loss_plot": wandb.Image(str(output_path))
|
| 290 |
+
})
|
| 291 |
+
|
| 292 |
+
# Show plot
|
| 293 |
+
plt.show()
|
| 294 |
+
|
| 295 |
+
except Exception as e:
|
| 296 |
+
logger.error(f"Error plotting validation losses: {str(e)}")
|
| 297 |
+
raise
|
| 298 |
+
|
| 299 |
+
def calculate_loss_statistics(epoch_losses):
|
| 300 |
+
"""Calculate and print loss statistics"""
|
| 301 |
+
try:
|
| 302 |
+
losses = [d['avg_loss'] for d in epoch_losses]
|
| 303 |
+
|
| 304 |
+
stats = {
|
| 305 |
+
'Mean Loss': np.mean(losses),
|
| 306 |
+
'Std Loss': np.std(losses),
|
| 307 |
+
'Min Loss': np.min(losses),
|
| 308 |
+
'Max Loss': np.max(losses),
|
| 309 |
+
'Best Epoch': epoch_losses[np.argmin(losses)]['epoch']
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
# Log statistics to wandb
|
| 313 |
+
wandb.log({
|
| 314 |
+
'val/mean_loss': stats['Mean Loss'],
|
| 315 |
+
'val/std_loss': stats['Std Loss'],
|
| 316 |
+
'val/min_loss': stats['Min Loss'],
|
| 317 |
+
'val/max_loss': stats['Max Loss'],
|
| 318 |
+
'val/best_epoch': stats['Best Epoch']
|
| 319 |
+
})
|
| 320 |
+
|
| 321 |
+
# Print statistics
|
| 322 |
+
print("\nValidation Loss Statistics:")
|
| 323 |
+
for metric_name, value in stats.items():
|
| 324 |
+
if metric_name == 'Best Epoch':
|
| 325 |
+
print(f"{metric_name}: {int(value)}")
|
| 326 |
+
else:
|
| 327 |
+
print(f"{metric_name}: {value:.4f}")
|
| 328 |
+
|
| 329 |
+
return stats
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
logger.error(f"Error calculating statistics: {str(e)}")
|
| 333 |
+
raise
|
| 334 |
+
|
| 335 |
+
def main():
|
| 336 |
+
try:
|
| 337 |
+
# Initialize wandb
|
| 338 |
+
setup_wandb()
|
| 339 |
+
|
| 340 |
+
# Load model and data
|
| 341 |
+
logger.info("Loading model and data...")
|
| 342 |
+
model, combined_loader, device, optimizer, scheduler, scaler, config = load_model_and_data()
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
# Collect validation losses
|
| 346 |
+
logger.info("Collecting validation losses...")
|
| 347 |
+
epoch_losses = collect_validation_losses(
|
| 348 |
+
model, combined_loader, device, optimizer, scheduler, scaler, config
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
# Plot losses
|
| 353 |
+
logger.info("Plotting validation losses...")
|
| 354 |
+
plot_validation_losses(epoch_losses)
|
| 355 |
+
|
| 356 |
+
# Calculate and print statistics
|
| 357 |
+
logger.info("Calculating statistics...")
|
| 358 |
+
calculate_loss_statistics(epoch_losses)
|
| 359 |
+
|
| 360 |
+
except Exception as e:
|
| 361 |
+
logger.error(f"Error in main: {str(e)}")
|
| 362 |
+
raise
|
| 363 |
+
finally:
|
| 364 |
+
# Clean up
|
| 365 |
+
torch.cuda.empty_cache()
|
| 366 |
+
# Finish wandb run
|
| 367 |
+
wandb.finish()
|
| 368 |
+
|
| 369 |
+
if __name__ == "__main__":
|
| 370 |
+
try:
|
| 371 |
+
main()
|
| 372 |
+
except Exception as e:
|
| 373 |
+
logger.error(f"Script failed: {str(e)}")
|
| 374 |
+
raise
|
analysis/plot_roc_curves.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
from sklearn.metrics import roc_curve, auc
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
def plot_roc_curves(predictions_path, output_dir=None):
|
| 9 |
+
"""
|
| 10 |
+
Plot ROC curves from model predictions
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
predictions_path (str): Path to the .npz file containing predictions
|
| 14 |
+
output_dir (str, optional): Directory to save plots. If None, will use same directory as predictions
|
| 15 |
+
"""
|
| 16 |
+
# Load predictions
|
| 17 |
+
data = np.load(predictions_path)
|
| 18 |
+
predictions = data['predictions']
|
| 19 |
+
labels = data['labels']
|
| 20 |
+
langs = data['langs']
|
| 21 |
+
|
| 22 |
+
# Create output directory
|
| 23 |
+
if output_dir is None:
|
| 24 |
+
output_dir = os.path.dirname(predictions_path)
|
| 25 |
+
plots_dir = os.path.join(output_dir, 'plots')
|
| 26 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 27 |
+
|
| 28 |
+
# Define toxicity types
|
| 29 |
+
toxicity_types = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 30 |
+
|
| 31 |
+
# Define language mapping
|
| 32 |
+
id_to_lang = {
|
| 33 |
+
0: 'English (en)',
|
| 34 |
+
1: 'Russian (ru)',
|
| 35 |
+
2: 'Turkish (tr)',
|
| 36 |
+
3: 'Spanish (es)',
|
| 37 |
+
4: 'French (fr)',
|
| 38 |
+
5: 'Italian (it)',
|
| 39 |
+
6: 'Portuguese (pt)'
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Plot overall ROC curves (one per class)
|
| 43 |
+
plt.figure(figsize=(10, 8))
|
| 44 |
+
for i, class_name in enumerate(toxicity_types):
|
| 45 |
+
fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
|
| 46 |
+
roc_auc = auc(fpr, tpr)
|
| 47 |
+
|
| 48 |
+
plt.plot(fpr, tpr, label=f'{class_name} (AUC = {roc_auc:.3f})')
|
| 49 |
+
|
| 50 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
| 51 |
+
plt.xlabel('False Positive Rate')
|
| 52 |
+
plt.ylabel('True Positive Rate')
|
| 53 |
+
plt.title('ROC Curves - All Classes')
|
| 54 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
| 55 |
+
plt.grid(True)
|
| 56 |
+
plt.tight_layout()
|
| 57 |
+
plt.savefig(os.path.join(plots_dir, 'roc_all_classes.png'), dpi=300, bbox_inches='tight')
|
| 58 |
+
plt.close()
|
| 59 |
+
|
| 60 |
+
# Plot per-class ROC curves with confidence intervals
|
| 61 |
+
n_bootstrap = 1000
|
| 62 |
+
n_classes = len(toxicity_types)
|
| 63 |
+
|
| 64 |
+
for i, class_name in enumerate(toxicity_types):
|
| 65 |
+
plt.figure(figsize=(8, 6))
|
| 66 |
+
|
| 67 |
+
# Calculate main ROC curve
|
| 68 |
+
fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
|
| 69 |
+
roc_auc = auc(fpr, tpr)
|
| 70 |
+
|
| 71 |
+
# Plot main curve
|
| 72 |
+
plt.plot(fpr, tpr, 'b-', label=f'ROC (AUC = {roc_auc:.3f})')
|
| 73 |
+
|
| 74 |
+
# Bootstrap for confidence intervals
|
| 75 |
+
tprs = []
|
| 76 |
+
aucs = []
|
| 77 |
+
mean_fpr = np.linspace(0, 1, 100)
|
| 78 |
+
|
| 79 |
+
for _ in range(n_bootstrap):
|
| 80 |
+
# Bootstrap sample indices
|
| 81 |
+
indices = np.random.randint(0, len(labels), len(labels))
|
| 82 |
+
if len(np.unique(labels[indices, i])) < 2:
|
| 83 |
+
continue
|
| 84 |
+
|
| 85 |
+
# Calculate ROC curve
|
| 86 |
+
fpr, tpr, _ = roc_curve(labels[indices, i], predictions[indices, i])
|
| 87 |
+
|
| 88 |
+
# Interpolate TPR at mean FPR points
|
| 89 |
+
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
| 90 |
+
interp_tpr[0] = 0.0
|
| 91 |
+
tprs.append(interp_tpr)
|
| 92 |
+
aucs.append(auc(fpr, tpr))
|
| 93 |
+
|
| 94 |
+
# Calculate confidence intervals
|
| 95 |
+
tprs = np.array(tprs)
|
| 96 |
+
mean_tpr = np.mean(tprs, axis=0)
|
| 97 |
+
std_tpr = np.std(tprs, axis=0)
|
| 98 |
+
|
| 99 |
+
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
|
| 100 |
+
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
|
| 101 |
+
|
| 102 |
+
# Plot confidence interval
|
| 103 |
+
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
|
| 104 |
+
label=f'±1 std. dev.')
|
| 105 |
+
|
| 106 |
+
# Calculate AUC confidence interval
|
| 107 |
+
auc_mean = np.mean(aucs)
|
| 108 |
+
auc_std = np.std(aucs)
|
| 109 |
+
plt.plot([], [], ' ', label=f'AUC = {auc_mean:.3f} ± {auc_std:.3f}')
|
| 110 |
+
|
| 111 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
| 112 |
+
plt.xlabel('False Positive Rate')
|
| 113 |
+
plt.ylabel('True Positive Rate')
|
| 114 |
+
plt.title(f'ROC Curve - {class_name}')
|
| 115 |
+
plt.legend(loc='lower right')
|
| 116 |
+
plt.grid(True)
|
| 117 |
+
plt.tight_layout()
|
| 118 |
+
plt.savefig(os.path.join(plots_dir, f'roc_{class_name}.png'), dpi=300)
|
| 119 |
+
plt.close()
|
| 120 |
+
|
| 121 |
+
# Plot per-language ROC curves (for toxic class)
|
| 122 |
+
plt.figure(figsize=(10, 8))
|
| 123 |
+
for lang_id, lang_name in id_to_lang.items():
|
| 124 |
+
# Get samples for this language
|
| 125 |
+
lang_mask = langs == lang_id
|
| 126 |
+
if lang_mask.sum() > 0 and len(np.unique(labels[lang_mask, 0])) > 1:
|
| 127 |
+
fpr, tpr, _ = roc_curve(labels[lang_mask, 0], predictions[lang_mask, 0])
|
| 128 |
+
roc_auc = auc(fpr, tpr)
|
| 129 |
+
plt.plot(fpr, tpr, label=f'{lang_name} (AUC = {roc_auc:.3f})')
|
| 130 |
+
|
| 131 |
+
plt.plot([0, 1], [0, 1], 'k--', label='Random')
|
| 132 |
+
plt.xlabel('False Positive Rate')
|
| 133 |
+
plt.ylabel('True Positive Rate')
|
| 134 |
+
plt.title('ROC Curves by Language - Toxic Class')
|
| 135 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
| 136 |
+
plt.grid(True)
|
| 137 |
+
plt.tight_layout()
|
| 138 |
+
plt.savefig(os.path.join(plots_dir, 'roc_by_language.png'), dpi=300, bbox_inches='tight')
|
| 139 |
+
plt.close()
|
| 140 |
+
|
| 141 |
+
print(f"\nROC curves have been saved to {plots_dir}")
|
| 142 |
+
print("\nGenerated plots:")
|
| 143 |
+
print("1. roc_all_classes.png - ROC curves for all toxicity classes")
|
| 144 |
+
print("2. roc_[class_name].png - Individual ROC curves with confidence intervals for each class")
|
| 145 |
+
print("3. roc_by_language.png - ROC curves for each language (toxic class)")
|
| 146 |
+
|
| 147 |
+
if __name__ == '__main__':
|
| 148 |
+
# Use the latest evaluation results
|
| 149 |
+
eval_dir = 'evaluation_results'
|
| 150 |
+
if os.path.exists(eval_dir):
|
| 151 |
+
# Find most recent evaluation directory
|
| 152 |
+
eval_dirs = sorted([d for d in os.listdir(eval_dir) if d.startswith('eval_')], reverse=True)
|
| 153 |
+
if eval_dirs:
|
| 154 |
+
latest_eval = os.path.join(eval_dir, eval_dirs[0])
|
| 155 |
+
predictions_path = os.path.join(latest_eval, 'predictions.npz')
|
| 156 |
+
if os.path.exists(predictions_path):
|
| 157 |
+
plot_roc_curves(predictions_path)
|
| 158 |
+
else:
|
| 159 |
+
print(f"No predictions file found in {latest_eval}")
|
| 160 |
+
else:
|
| 161 |
+
print(f"No evaluation directories found in {eval_dir}")
|
| 162 |
+
else:
|
| 163 |
+
print(f"Evaluation directory {eval_dir} not found")
|
app.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
from model.inference_optimized import OptimizedToxicityClassifier
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
from typing import List, Dict
|
| 9 |
+
import langid
|
| 10 |
+
import pandas as pd
|
| 11 |
+
|
| 12 |
+
# Configure paths
|
| 13 |
+
ONNX_MODEL_PATH = os.environ.get("ONNX_MODEL_PATH", "weights/toxic_classifier.onnx")
|
| 14 |
+
PYTORCH_MODEL_PATH = os.environ.get("PYTORCH_MODEL_PATH", "weights/toxic_classifier_xlm-roberta-large/pytorch_model.bin")
|
| 15 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
+
|
| 17 |
+
# Supported languages
|
| 18 |
+
SUPPORTED_LANGUAGES = {
|
| 19 |
+
'en': 'English',
|
| 20 |
+
'ru': 'Russian',
|
| 21 |
+
'tr': 'Turkish',
|
| 22 |
+
'es': 'Spanish',
|
| 23 |
+
'fr': 'French',
|
| 24 |
+
'it': 'Italian',
|
| 25 |
+
'pt': 'Portuguese'
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
# Initialize classifier
|
| 29 |
+
try:
|
| 30 |
+
if os.path.exists(ONNX_MODEL_PATH):
|
| 31 |
+
classifier = OptimizedToxicityClassifier(onnx_path=ONNX_MODEL_PATH, device=DEVICE)
|
| 32 |
+
print(f"Loaded ONNX model from {ONNX_MODEL_PATH}")
|
| 33 |
+
else:
|
| 34 |
+
classifier = OptimizedToxicityClassifier(pytorch_path=PYTORCH_MODEL_PATH, device=DEVICE)
|
| 35 |
+
print(f"Loaded PyTorch model from {PYTORCH_MODEL_PATH}")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Error loading model: {str(e)}")
|
| 38 |
+
classifier = None
|
| 39 |
+
|
| 40 |
+
def detect_language(text: str) -> str:
|
| 41 |
+
"""Detect language of input text"""
|
| 42 |
+
try:
|
| 43 |
+
lang, _ = langid.classify(text)
|
| 44 |
+
return lang if lang in SUPPORTED_LANGUAGES else 'en'
|
| 45 |
+
except:
|
| 46 |
+
return 'en'
|
| 47 |
+
|
| 48 |
+
def predict_toxicity(text: str, selected_language: str = None) -> Dict:
|
| 49 |
+
"""Predict toxicity of input text"""
|
| 50 |
+
if not text or not text.strip():
|
| 51 |
+
return {
|
| 52 |
+
"error": "Please enter some text to analyze.",
|
| 53 |
+
"html_result": "<div class='error'>Please enter some text to analyze.</div>"
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
if classifier is None:
|
| 57 |
+
return {
|
| 58 |
+
"error": "Model not loaded. Please check logs.",
|
| 59 |
+
"html_result": "<div class='error'>Model not loaded. Please check logs.</div>"
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# Detect language if not specified
|
| 63 |
+
if not selected_language or selected_language == "Auto-detect":
|
| 64 |
+
lang_code = detect_language(text)
|
| 65 |
+
detected = True
|
| 66 |
+
else:
|
| 67 |
+
# Convert from display name to code
|
| 68 |
+
lang_code = next((code for code, name in SUPPORTED_LANGUAGES.items()
|
| 69 |
+
if name == selected_language), 'en')
|
| 70 |
+
detected = False
|
| 71 |
+
|
| 72 |
+
# Run prediction
|
| 73 |
+
try:
|
| 74 |
+
results = classifier.predict([text], langs=[lang_code])[0]
|
| 75 |
+
|
| 76 |
+
# Format probabilities for display
|
| 77 |
+
probs = results["probabilities"]
|
| 78 |
+
sorted_categories = sorted(
|
| 79 |
+
[(label, probs[label]) for label in probs],
|
| 80 |
+
key=lambda x: x[1],
|
| 81 |
+
reverse=True
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Create bar chart
|
| 85 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 86 |
+
labels = [label.replace('_', ' ').title() for label, _ in sorted_categories]
|
| 87 |
+
values = [prob * 100 for _, prob in sorted_categories]
|
| 88 |
+
colors = ['#ff6b6b' if val >= 50 else '#74c0fc' for val in values]
|
| 89 |
+
|
| 90 |
+
ax.barh(labels, values, color=colors)
|
| 91 |
+
ax.set_xlim(0, 100)
|
| 92 |
+
ax.set_xlabel('Probability (%)')
|
| 93 |
+
ax.set_title('Toxicity Analysis')
|
| 94 |
+
ax.grid(axis='x', linestyle='--', alpha=0.7)
|
| 95 |
+
|
| 96 |
+
# Annotate values
|
| 97 |
+
for i, v in enumerate(values):
|
| 98 |
+
ax.text(v + 1, i, f'{v:.1f}%', va='center')
|
| 99 |
+
|
| 100 |
+
# Create HTML result
|
| 101 |
+
lang_display = SUPPORTED_LANGUAGES.get(lang_code, lang_code)
|
| 102 |
+
overall_result = "TOXIC" if results["is_toxic"] else "NON-TOXIC"
|
| 103 |
+
result_color = "#ff6b6b" if results["is_toxic"] else "#66d9e8"
|
| 104 |
+
|
| 105 |
+
html_result = f"""
|
| 106 |
+
<div style='margin-bottom: 20px;'>
|
| 107 |
+
<h2>Analysis Result: <span style='color: {result_color};'>{overall_result}</span></h2>
|
| 108 |
+
<h3>Language: {lang_display} {'(detected)' if detected else ''}</h3>
|
| 109 |
+
</div>
|
| 110 |
+
<div style='margin-bottom: 10px;'>
|
| 111 |
+
<table width='100%' style='border-collapse: collapse;'>
|
| 112 |
+
<tr style='background-color: #e9ecef; font-weight: bold;'>
|
| 113 |
+
<th style='padding: 8px; text-align: left; border: 1px solid #dee2e6;'>Category</th>
|
| 114 |
+
<th style='padding: 8px; text-align: right; border: 1px solid #dee2e6;'>Probability</th>
|
| 115 |
+
<th style='padding: 8px; text-align: center; border: 1px solid #dee2e6;'>Status</th>
|
| 116 |
+
</tr>
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
# Add rows for each toxicity category
|
| 120 |
+
for label, prob in sorted_categories:
|
| 121 |
+
formatted_label = label.replace('_', ' ').title()
|
| 122 |
+
status = "DETECTED" if prob >= 0.5 else "Not Detected"
|
| 123 |
+
status_color = "#ff6b6b" if prob >= 0.5 else "#66d9e8"
|
| 124 |
+
prob_percent = f"{prob * 100:.1f}%"
|
| 125 |
+
|
| 126 |
+
html_result += f"""
|
| 127 |
+
<tr>
|
| 128 |
+
<td style='padding: 8px; border: 1px solid #dee2e6;'>{formatted_label}</td>
|
| 129 |
+
<td style='padding: 8px; text-align: right; border: 1px solid #dee2e6;'>{prob_percent}</td>
|
| 130 |
+
<td style='padding: 8px; text-align: center; border: 1px solid #dee2e6; color: {status_color}; font-weight: bold;'>{status}</td>
|
| 131 |
+
</tr>
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
html_result += "</table></div>"
|
| 135 |
+
|
| 136 |
+
# Add detected categories if toxic
|
| 137 |
+
if results["is_toxic"]:
|
| 138 |
+
toxic_categories = [cat.replace('_', ' ').title() for cat in results["toxic_categories"]]
|
| 139 |
+
categories_list = ", ".join(toxic_categories)
|
| 140 |
+
html_result += f"""
|
| 141 |
+
<div style='margin-top: 10px;'>
|
| 142 |
+
<p><strong>Detected toxic categories:</strong> {categories_list}</p>
|
| 143 |
+
</div>
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
return {
|
| 147 |
+
"prediction": results,
|
| 148 |
+
"html_result": html_result,
|
| 149 |
+
"fig": fig
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
import traceback
|
| 154 |
+
traceback.print_exc()
|
| 155 |
+
return {
|
| 156 |
+
"error": f"Error processing text: {str(e)}",
|
| 157 |
+
"html_result": f"<div class='error'>Error processing text: {str(e)}</div>"
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
def create_app():
|
| 161 |
+
"""Create and configure the Gradio interface"""
|
| 162 |
+
# Create language dropdown options
|
| 163 |
+
language_options = ["Auto-detect"] + list(SUPPORTED_LANGUAGES.values())
|
| 164 |
+
|
| 165 |
+
# Define the interface
|
| 166 |
+
with gr.Blocks(css="""
|
| 167 |
+
.error { color: #ff6b6b; font-weight: bold; padding: 10px; border: 1px solid #ff6b6b; }
|
| 168 |
+
.container { margin: 0 auto; max-width: 900px; }
|
| 169 |
+
.gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
|
| 170 |
+
.example-text { font-style: italic; color: #666; }
|
| 171 |
+
""") as app:
|
| 172 |
+
gr.Markdown("""
|
| 173 |
+
# Multilingual Toxic Comment Classifier
|
| 174 |
+
This app analyzes text for different types of toxicity across multiple languages.
|
| 175 |
+
Enter your text, select a language (or let it auto-detect), and click 'Analyze'.
|
| 176 |
+
|
| 177 |
+
Supported languages: English, Russian, Turkish, Spanish, French, Italian, Portuguese
|
| 178 |
+
""")
|
| 179 |
+
|
| 180 |
+
with gr.Row():
|
| 181 |
+
with gr.Column(scale=3):
|
| 182 |
+
text_input = gr.Textbox(
|
| 183 |
+
label="Enter text to analyze",
|
| 184 |
+
placeholder="Type or paste text here...",
|
| 185 |
+
lines=5
|
| 186 |
+
)
|
| 187 |
+
lang_dropdown = gr.Dropdown(
|
| 188 |
+
choices=language_options,
|
| 189 |
+
value="Auto-detect",
|
| 190 |
+
label="Language"
|
| 191 |
+
)
|
| 192 |
+
analyze_btn = gr.Button("Analyze", variant="primary")
|
| 193 |
+
|
| 194 |
+
with gr.Column(scale=2):
|
| 195 |
+
gr.Markdown("### Example texts:")
|
| 196 |
+
with gr.Accordion("English example"):
|
| 197 |
+
en_example_btn = gr.Button("Use English example")
|
| 198 |
+
with gr.Accordion("Spanish example"):
|
| 199 |
+
es_example_btn = gr.Button("Use Spanish example")
|
| 200 |
+
with gr.Accordion("French example"):
|
| 201 |
+
fr_example_btn = gr.Button("Use French example")
|
| 202 |
+
|
| 203 |
+
# Examples
|
| 204 |
+
en_example_text = "You are such an idiot, nobody likes your stupid content."
|
| 205 |
+
es_example_text = "Eres un completo idiota y nadie te quiere."
|
| 206 |
+
fr_example_text = "Tu es tellement stupide, personne n'aime ton contenu minable."
|
| 207 |
+
|
| 208 |
+
en_example_btn.click(
|
| 209 |
+
lambda: en_example_text,
|
| 210 |
+
outputs=text_input
|
| 211 |
+
)
|
| 212 |
+
es_example_btn.click(
|
| 213 |
+
lambda: es_example_text,
|
| 214 |
+
outputs=text_input
|
| 215 |
+
)
|
| 216 |
+
fr_example_btn.click(
|
| 217 |
+
lambda: fr_example_text,
|
| 218 |
+
outputs=text_input
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
# Output components
|
| 222 |
+
result_html = gr.HTML(label="Analysis Result")
|
| 223 |
+
plot_output = gr.Plot(label="Toxicity Probabilities")
|
| 224 |
+
|
| 225 |
+
# Set up event handling
|
| 226 |
+
analyze_btn.click(
|
| 227 |
+
predict_toxicity,
|
| 228 |
+
inputs=[text_input, lang_dropdown],
|
| 229 |
+
outputs=[result_html, plot_output]
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# Also analyze on pressing Enter in the text box
|
| 233 |
+
text_input.submit(
|
| 234 |
+
predict_toxicity,
|
| 235 |
+
inputs=[text_input, lang_dropdown],
|
| 236 |
+
outputs=[result_html, plot_output]
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
gr.Markdown("""
|
| 240 |
+
### About this model
|
| 241 |
+
This model classifies text into six toxicity categories:
|
| 242 |
+
- **Toxic**: General toxicity
|
| 243 |
+
- **Severe Toxic**: Extreme toxicity
|
| 244 |
+
- **Obscene**: Obscene content
|
| 245 |
+
- **Threat**: Threatening content
|
| 246 |
+
- **Insult**: Insulting content
|
| 247 |
+
- **Identity Hate**: Identity-based hate
|
| 248 |
+
|
| 249 |
+
Built using XLM-RoBERTa with language-aware fine-tuning.
|
| 250 |
+
""")
|
| 251 |
+
|
| 252 |
+
return app
|
| 253 |
+
|
| 254 |
+
# Launch the app when script is run directly
|
| 255 |
+
if __name__ == "__main__":
|
| 256 |
+
# Create and launch the app
|
| 257 |
+
app = create_app()
|
| 258 |
+
app.launch(
|
| 259 |
+
server_name="0.0.0.0", # Bind to all interfaces
|
| 260 |
+
server_port=7860, # Default Gradio port
|
| 261 |
+
share=True # Generate public link
|
| 262 |
+
)
|
augmentation/balance_english.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
# Configure CPU and thread settings FIRST, before any other imports
|
| 5 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'
|
| 6 |
+
os.environ['TF_CPU_ENABLE_AVX2'] = '1'
|
| 7 |
+
os.environ['TF_CPU_ENABLE_AVX512F'] = '1'
|
| 8 |
+
os.environ['TF_CPU_ENABLE_AVX512_VNNI'] = '1'
|
| 9 |
+
os.environ['TF_CPU_ENABLE_FMA'] = '1'
|
| 10 |
+
os.environ['MKL_NUM_THREADS'] = '80'
|
| 11 |
+
os.environ['OMP_NUM_THREADS'] = '80'
|
| 12 |
+
|
| 13 |
+
# Set PyTorch thread configurations once
|
| 14 |
+
torch.set_num_threads(80)
|
| 15 |
+
torch.set_num_interop_threads(10)
|
| 16 |
+
|
| 17 |
+
# Now import everything else
|
| 18 |
+
import pandas as pd
|
| 19 |
+
import numpy as np
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
import logging
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
import sys
|
| 24 |
+
from toxic_augment import ToxicAugmenter
|
| 25 |
+
import json
|
| 26 |
+
|
| 27 |
+
# Configure logging
|
| 28 |
+
log_dir = Path("logs")
|
| 29 |
+
log_dir.mkdir(exist_ok=True)
|
| 30 |
+
|
| 31 |
+
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
| 32 |
+
log_file = log_dir / f"balance_english_{timestamp}.log"
|
| 33 |
+
|
| 34 |
+
logging.basicConfig(
|
| 35 |
+
level=logging.INFO,
|
| 36 |
+
format='%(asctime)s | %(message)s',
|
| 37 |
+
handlers=[
|
| 38 |
+
logging.StreamHandler(sys.stdout),
|
| 39 |
+
logging.FileHandler(log_file)
|
| 40 |
+
]
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
logger = logging.getLogger(__name__)
|
| 44 |
+
|
| 45 |
+
def analyze_label_distribution(df, lang='en'):
|
| 46 |
+
"""Analyze label distribution for a specific language"""
|
| 47 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 48 |
+
lang_df = df[df['lang'] == lang]
|
| 49 |
+
total = len(lang_df)
|
| 50 |
+
|
| 51 |
+
if total == 0:
|
| 52 |
+
logger.warning(f"No samples found for language {lang.upper()}.")
|
| 53 |
+
return {}
|
| 54 |
+
|
| 55 |
+
logger.info(f"\nLabel Distribution for {lang.upper()}:")
|
| 56 |
+
logger.info("-" * 50)
|
| 57 |
+
dist = {}
|
| 58 |
+
for label in labels:
|
| 59 |
+
count = lang_df[label].sum()
|
| 60 |
+
percentage = (count / total) * 100
|
| 61 |
+
dist[label] = {'count': int(count), 'percentage': percentage}
|
| 62 |
+
logger.info(f"{label}: {count:,} ({percentage:.2f}%)")
|
| 63 |
+
return dist
|
| 64 |
+
|
| 65 |
+
def analyze_language_distribution(df):
|
| 66 |
+
"""Analyze current language distribution"""
|
| 67 |
+
lang_dist = df['lang'].value_counts()
|
| 68 |
+
logger.info("\nCurrent Language Distribution:")
|
| 69 |
+
logger.info("-" * 50)
|
| 70 |
+
for lang, count in lang_dist.items():
|
| 71 |
+
logger.info(f"{lang}: {count:,} comments ({count/len(df)*100:.2f}%)")
|
| 72 |
+
return lang_dist
|
| 73 |
+
|
| 74 |
+
def calculate_required_samples(df):
|
| 75 |
+
"""Calculate how many English samples we need to generate"""
|
| 76 |
+
lang_counts = df['lang'].value_counts()
|
| 77 |
+
target_count = lang_counts.max() # Use the largest language count as target
|
| 78 |
+
en_count = lang_counts.get('en', 0)
|
| 79 |
+
required_samples = target_count - en_count
|
| 80 |
+
|
| 81 |
+
logger.info(f"\nTarget count per language: {target_count:,}")
|
| 82 |
+
logger.info(f"Current English count: {en_count:,}")
|
| 83 |
+
logger.info(f"Required additional English samples: {required_samples:,}")
|
| 84 |
+
|
| 85 |
+
return required_samples
|
| 86 |
+
|
| 87 |
+
def generate_balanced_samples(df, required_samples):
|
| 88 |
+
"""Generate samples maintaining original class distribution ratios"""
|
| 89 |
+
logger.info("\nGenerating balanced samples...")
|
| 90 |
+
|
| 91 |
+
# Get English samples
|
| 92 |
+
en_df = df[df['lang'] == 'en']
|
| 93 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 94 |
+
|
| 95 |
+
# Calculate target counts for each label
|
| 96 |
+
target_counts = {}
|
| 97 |
+
for label in labels:
|
| 98 |
+
count = en_df[label].sum()
|
| 99 |
+
ratio = count / len(en_df)
|
| 100 |
+
target_count = int(ratio * required_samples)
|
| 101 |
+
target_counts[label] = target_count
|
| 102 |
+
logger.info(f"Target count for {label}: {target_count:,}")
|
| 103 |
+
|
| 104 |
+
augmented_samples = []
|
| 105 |
+
augmenter = ToxicAugmenter()
|
| 106 |
+
total_generated = 0
|
| 107 |
+
|
| 108 |
+
# Generate samples for each label
|
| 109 |
+
for label, target_count in target_counts.items():
|
| 110 |
+
if target_count == 0:
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
logger.info(f"\nGenerating {target_count:,} samples for {label}")
|
| 114 |
+
|
| 115 |
+
# Get seed texts with this label
|
| 116 |
+
seed_texts = en_df[en_df[label] == 1]['comment_text'].tolist()
|
| 117 |
+
|
| 118 |
+
if not seed_texts:
|
| 119 |
+
logger.warning(f"No seed texts found for {label}, skipping...")
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
# Generate samples with 5-minute timeout
|
| 123 |
+
new_samples = augmenter.augment_dataset(
|
| 124 |
+
target_samples=target_count,
|
| 125 |
+
label=label, # Using single label instead of label_combo
|
| 126 |
+
seed_texts=seed_texts,
|
| 127 |
+
timeout_minutes=5
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
if new_samples is not None and not new_samples.empty:
|
| 131 |
+
augmented_samples.append(new_samples)
|
| 132 |
+
total_generated += len(new_samples)
|
| 133 |
+
|
| 134 |
+
# Log progress
|
| 135 |
+
logger.info(f"✓ Generated {len(new_samples):,} samples")
|
| 136 |
+
logger.info(f"Progress: {total_generated:,}/{required_samples:,}")
|
| 137 |
+
|
| 138 |
+
# Check if we have reached our global required samples
|
| 139 |
+
if total_generated >= required_samples:
|
| 140 |
+
logger.info("Reached required sample count, stopping generation")
|
| 141 |
+
break
|
| 142 |
+
|
| 143 |
+
# Combine all generated samples
|
| 144 |
+
if augmented_samples:
|
| 145 |
+
augmented_df = pd.concat(augmented_samples, ignore_index=True)
|
| 146 |
+
augmented_df['lang'] = 'en'
|
| 147 |
+
|
| 148 |
+
# Ensure we don't exceed the required sample count
|
| 149 |
+
if len(augmented_df) > required_samples:
|
| 150 |
+
logger.info(f"Trimming excess samples from {len(augmented_df):,} to {required_samples:,}")
|
| 151 |
+
augmented_df = augmented_df.head(required_samples)
|
| 152 |
+
|
| 153 |
+
# Log final class distribution
|
| 154 |
+
logger.info("\nFinal class distribution in generated samples:")
|
| 155 |
+
for label in labels:
|
| 156 |
+
count = augmented_df[label].sum()
|
| 157 |
+
percentage = (count / len(augmented_df)) * 100
|
| 158 |
+
logger.info(f"{label}: {count:,} ({percentage:.2f}%)")
|
| 159 |
+
|
| 160 |
+
# Also log clean samples
|
| 161 |
+
clean_count = len(augmented_df[augmented_df[labels].sum(axis=1) == 0])
|
| 162 |
+
clean_percentage = (clean_count / len(augmented_df)) * 100
|
| 163 |
+
logger.info(f"Clean samples: {clean_count:,} ({clean_percentage:.2f}%)")
|
| 164 |
+
|
| 165 |
+
return augmented_df
|
| 166 |
+
else:
|
| 167 |
+
raise Exception("Failed to generate any valid samples")
|
| 168 |
+
|
| 169 |
+
def balance_english_data():
|
| 170 |
+
"""Main function to balance English data with other languages"""
|
| 171 |
+
try:
|
| 172 |
+
# Load dataset
|
| 173 |
+
input_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv'
|
| 174 |
+
logger.info(f"Loading dataset from {input_file}")
|
| 175 |
+
df = pd.read_csv(input_file)
|
| 176 |
+
|
| 177 |
+
# Analyze current distribution
|
| 178 |
+
logger.info("\nAnalyzing current distribution...")
|
| 179 |
+
initial_dist = analyze_language_distribution(df)
|
| 180 |
+
initial_label_dist = analyze_label_distribution(df, 'en')
|
| 181 |
+
|
| 182 |
+
# Calculate required samples
|
| 183 |
+
required_samples = calculate_required_samples(df)
|
| 184 |
+
|
| 185 |
+
if required_samples <= 0:
|
| 186 |
+
logger.info("English data is already balanced. No augmentation needed.")
|
| 187 |
+
return
|
| 188 |
+
|
| 189 |
+
# Generate balanced samples
|
| 190 |
+
augmented_df = generate_balanced_samples(df, required_samples)
|
| 191 |
+
|
| 192 |
+
# Merge with original dataset
|
| 193 |
+
logger.info("\nMerging datasets...")
|
| 194 |
+
output_file = 'dataset/processed/MULTILINGUAL_TOXIC_DATASET_BALANCED.csv'
|
| 195 |
+
|
| 196 |
+
# Combine datasets
|
| 197 |
+
combined_df = pd.concat([df, augmented_df], ignore_index=True)
|
| 198 |
+
|
| 199 |
+
# Save balanced dataset
|
| 200 |
+
combined_df.to_csv(output_file, index=False)
|
| 201 |
+
logger.info(f"\nSaved balanced dataset to {output_file}")
|
| 202 |
+
|
| 203 |
+
# Final distribution check
|
| 204 |
+
logger.info("\nFinal distribution after balancing:")
|
| 205 |
+
final_dist = analyze_language_distribution(combined_df)
|
| 206 |
+
final_label_dist = analyze_label_distribution(combined_df, 'en')
|
| 207 |
+
|
| 208 |
+
# Save distribution statistics
|
| 209 |
+
stats = {
|
| 210 |
+
'timestamp': timestamp,
|
| 211 |
+
'initial_distribution': {
|
| 212 |
+
'languages': initial_dist.to_dict(),
|
| 213 |
+
'english_labels': initial_label_dist
|
| 214 |
+
},
|
| 215 |
+
'final_distribution': {
|
| 216 |
+
'languages': final_dist.to_dict(),
|
| 217 |
+
'english_labels': final_label_dist
|
| 218 |
+
},
|
| 219 |
+
'samples_generated': len(augmented_df),
|
| 220 |
+
'total_samples': len(combined_df)
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
stats_file = f'logs/balance_stats_{timestamp}.json'
|
| 224 |
+
with open(stats_file, 'w') as f:
|
| 225 |
+
json.dump(stats, f, indent=2)
|
| 226 |
+
logger.info(f"\nSaved balancing statistics to {stats_file}")
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"Error during balancing: {str(e)}")
|
| 230 |
+
raise
|
| 231 |
+
|
| 232 |
+
def main():
|
| 233 |
+
balance_english_data()
|
| 234 |
+
|
| 235 |
+
if __name__ == "__main__":
|
| 236 |
+
logger.info("Starting English data balancing process...")
|
| 237 |
+
main()
|
augmentation/threat_augment.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import (
|
| 3 |
+
AutoModelForCausalLM,
|
| 4 |
+
AutoTokenizer,
|
| 5 |
+
BitsAndBytesConfig
|
| 6 |
+
)
|
| 7 |
+
from langdetect import detect
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
from tqdm import tqdm
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import logging
|
| 13 |
+
import gc
|
| 14 |
+
from typing import List
|
| 15 |
+
import json
|
| 16 |
+
from datetime import datetime, timedelta
|
| 17 |
+
import time
|
| 18 |
+
import sys
|
| 19 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 20 |
+
from sklearn.linear_model import LogisticRegression
|
| 21 |
+
import joblib
|
| 22 |
+
|
| 23 |
+
# Create log directories
|
| 24 |
+
log_dir = Path("logs")
|
| 25 |
+
log_dir.mkdir(exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# Get timestamp for log file
|
| 28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 29 |
+
log_file = log_dir / f"generation_{timestamp}.log"
|
| 30 |
+
|
| 31 |
+
# Configure logging once at the start
|
| 32 |
+
logging.basicConfig(
|
| 33 |
+
level=logging.INFO,
|
| 34 |
+
format='%(asctime)s | %(message)s',
|
| 35 |
+
handlers=[
|
| 36 |
+
logging.StreamHandler(sys.stdout),
|
| 37 |
+
logging.FileHandler(log_file)
|
| 38 |
+
]
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
logger.info(f"Starting new run. Log file: {log_file}")
|
| 43 |
+
|
| 44 |
+
def log_separator(message: str = ""):
|
| 45 |
+
"""Print a separator line with optional message"""
|
| 46 |
+
if message:
|
| 47 |
+
logger.info("\n" + "="*40 + f" {message} " + "="*40)
|
| 48 |
+
else:
|
| 49 |
+
logger.info("\n" + "="*100)
|
| 50 |
+
|
| 51 |
+
class FastThreatValidator:
|
| 52 |
+
"""Fast threat validation using logistic regression"""
|
| 53 |
+
def __init__(self, model_path: str = "weights/threat_validator.joblib"):
|
| 54 |
+
self.model_path = model_path
|
| 55 |
+
if Path(model_path).exists():
|
| 56 |
+
logger.info("Loading fast threat validator...")
|
| 57 |
+
model_data = joblib.load(model_path)
|
| 58 |
+
self.vectorizer = model_data['vectorizer']
|
| 59 |
+
self.model = model_data['model']
|
| 60 |
+
logger.info("✓ Fast validator loaded")
|
| 61 |
+
else:
|
| 62 |
+
logger.info("Training fast threat validator...")
|
| 63 |
+
self._train_validator()
|
| 64 |
+
logger.info("✓ Fast validator trained and saved")
|
| 65 |
+
|
| 66 |
+
def _train_validator(self):
|
| 67 |
+
"""Train a simple logistic regression model for threat detection"""
|
| 68 |
+
# Load training data
|
| 69 |
+
train_df = pd.read_csv("dataset/split/train.csv")
|
| 70 |
+
|
| 71 |
+
# Prepare data
|
| 72 |
+
X = train_df['comment_text'].fillna('')
|
| 73 |
+
y = train_df['threat']
|
| 74 |
+
|
| 75 |
+
# Create and fit vectorizer
|
| 76 |
+
self.vectorizer = TfidfVectorizer(
|
| 77 |
+
max_features=10000,
|
| 78 |
+
ngram_range=(1, 2),
|
| 79 |
+
strip_accents='unicode',
|
| 80 |
+
min_df=2
|
| 81 |
+
)
|
| 82 |
+
X_vec = self.vectorizer.fit_transform(X)
|
| 83 |
+
|
| 84 |
+
# Train model
|
| 85 |
+
self.model = LogisticRegression(
|
| 86 |
+
C=1.0,
|
| 87 |
+
class_weight='balanced',
|
| 88 |
+
max_iter=200,
|
| 89 |
+
n_jobs=-1
|
| 90 |
+
)
|
| 91 |
+
self.model.fit(X_vec, y)
|
| 92 |
+
|
| 93 |
+
# Save model
|
| 94 |
+
joblib.dump({
|
| 95 |
+
'vectorizer': self.vectorizer,
|
| 96 |
+
'model': self.model
|
| 97 |
+
}, self.model_path)
|
| 98 |
+
|
| 99 |
+
def validate(self, texts: List[str], threshold: float = 0.6) -> List[bool]:
|
| 100 |
+
"""Validate texts using the fast model"""
|
| 101 |
+
# Vectorize texts
|
| 102 |
+
X = self.vectorizer.transform(texts)
|
| 103 |
+
|
| 104 |
+
# Get probabilities
|
| 105 |
+
probs = self.model.predict_proba(X)[:, 1]
|
| 106 |
+
|
| 107 |
+
# Return boolean mask
|
| 108 |
+
return probs >= threshold
|
| 109 |
+
|
| 110 |
+
class ThreatAugmenter:
|
| 111 |
+
def __init__(self, seed_samples_path: str = "dataset/processed/MULTILINGUAL_TOXIC_DATASET_360K_7LANG_FINAL.csv"):
|
| 112 |
+
log_separator("INITIALIZATION")
|
| 113 |
+
|
| 114 |
+
# Use global log file
|
| 115 |
+
self.log_file = log_file
|
| 116 |
+
|
| 117 |
+
# Initialize generation buffer
|
| 118 |
+
self.generation_buffer = []
|
| 119 |
+
self.buffer_size = 100 # Flush buffer every 100 entries
|
| 120 |
+
|
| 121 |
+
# Multi-GPU setup
|
| 122 |
+
self.num_gpus = torch.cuda.device_count()
|
| 123 |
+
if self.num_gpus > 0:
|
| 124 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 125 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 126 |
+
logger.info(f"Found {self.num_gpus} GPUs:")
|
| 127 |
+
for i in range(self.num_gpus):
|
| 128 |
+
mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
|
| 129 |
+
logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f}GB)")
|
| 130 |
+
|
| 131 |
+
# Load models
|
| 132 |
+
log_separator("LOADING MODELS")
|
| 133 |
+
logger.info("Loading Mistral-7B...")
|
| 134 |
+
|
| 135 |
+
# Configure model for multi-GPU
|
| 136 |
+
quantization_config = BitsAndBytesConfig(
|
| 137 |
+
load_in_4bit=True,
|
| 138 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 139 |
+
bnb_4bit_quant_type="nf4",
|
| 140 |
+
bnb_4bit_use_double_quant=True
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
self.llm = AutoModelForCausalLM.from_pretrained(
|
| 144 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 145 |
+
device_map="balanced", # Ensures proper dual GPU usage
|
| 146 |
+
torch_dtype=torch.float16,
|
| 147 |
+
quantization_config=quantization_config,
|
| 148 |
+
max_memory={0: "22GB", 1: "22GB"} # Explicitly set memory limits for each GPU
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
self.llm_tokenizer = AutoTokenizer.from_pretrained(
|
| 152 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 153 |
+
padding_side="left",
|
| 154 |
+
use_fast=True
|
| 155 |
+
)
|
| 156 |
+
self.llm_tokenizer.pad_token = self.llm_tokenizer.eos_token
|
| 157 |
+
logger.info("✓ Mistral-7B loaded")
|
| 158 |
+
|
| 159 |
+
# Initialize fast validator
|
| 160 |
+
self.validator = FastThreatValidator()
|
| 161 |
+
logger.info("✓ Fast validator initialized")
|
| 162 |
+
|
| 163 |
+
# Load and preprocess data
|
| 164 |
+
log_separator("LOADING DATA")
|
| 165 |
+
logger.info(f"Loading samples from {seed_samples_path}")
|
| 166 |
+
self.seed_df = pd.read_csv(seed_samples_path)
|
| 167 |
+
self.en_threat_samples = self.seed_df[
|
| 168 |
+
(self.seed_df['lang'] == 'en') &
|
| 169 |
+
(self.seed_df['threat'] == 1)
|
| 170 |
+
]
|
| 171 |
+
logger.info(f"Found {len(self.en_threat_samples)} EN threat samples")
|
| 172 |
+
|
| 173 |
+
# Optimize batch processing
|
| 174 |
+
self.max_batch_size = 48 # Increased batch size
|
| 175 |
+
self.prefetch_factor = 4
|
| 176 |
+
self.num_workers = 8
|
| 177 |
+
|
| 178 |
+
# Memory cleanup
|
| 179 |
+
torch.cuda.empty_cache()
|
| 180 |
+
gc.collect()
|
| 181 |
+
log_separator()
|
| 182 |
+
|
| 183 |
+
def generate_prompt(self, seed_text: str) -> str:
|
| 184 |
+
"""Generate a prompt for realistic threatening content"""
|
| 185 |
+
return f"""<s>[INST] Generate a threatening social media comment that is realistic and intimidating.
|
| 186 |
+
|
| 187 |
+
Reference example (generate something different): {seed_text}
|
| 188 |
+
|
| 189 |
+
Requirements:
|
| 190 |
+
- Make it genuinely threatening
|
| 191 |
+
- Use authentic social media language
|
| 192 |
+
- Keep it under 50 words
|
| 193 |
+
- Must be different from example
|
| 194 |
+
|
| 195 |
+
Generate ONLY the comment: [/INST]"""
|
| 196 |
+
|
| 197 |
+
def flush_buffer(self):
|
| 198 |
+
"""Flush the generation buffer to disk"""
|
| 199 |
+
if self.generation_buffer:
|
| 200 |
+
try:
|
| 201 |
+
with open(self.log_file, 'a', encoding='utf-8') as f:
|
| 202 |
+
for entry in self.generation_buffer:
|
| 203 |
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
| 204 |
+
self.generation_buffer = []
|
| 205 |
+
except Exception as e:
|
| 206 |
+
logger.error(f"Failed to flush buffer: {str(e)}")
|
| 207 |
+
|
| 208 |
+
def log_generation(self, seed_text: str, prompt: str, generated_text: str, is_valid: bool):
|
| 209 |
+
"""Buffer log generation details"""
|
| 210 |
+
log_entry = {
|
| 211 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 212 |
+
"seed_text": seed_text,
|
| 213 |
+
"prompt": prompt,
|
| 214 |
+
"generated_text": generated_text,
|
| 215 |
+
"is_valid": is_valid
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
self.generation_buffer.append(log_entry)
|
| 219 |
+
|
| 220 |
+
# Flush buffer if it reaches the size limit
|
| 221 |
+
if len(self.generation_buffer) >= self.buffer_size:
|
| 222 |
+
self.flush_buffer()
|
| 223 |
+
|
| 224 |
+
def generate_samples(self, prompts: List[str], seed_texts: List[str]) -> List[str]:
|
| 225 |
+
try:
|
| 226 |
+
with torch.amp.autocast('cuda', dtype=torch.float16):
|
| 227 |
+
inputs = self.llm_tokenizer(prompts, return_tensors="pt", padding=True,
|
| 228 |
+
truncation=True, max_length=256).to(self.llm.device)
|
| 229 |
+
|
| 230 |
+
outputs = self.llm.generate(
|
| 231 |
+
**inputs,
|
| 232 |
+
max_new_tokens=32,
|
| 233 |
+
temperature=0.95,
|
| 234 |
+
do_sample=True,
|
| 235 |
+
top_p=0.92,
|
| 236 |
+
top_k=50,
|
| 237 |
+
num_return_sequences=1,
|
| 238 |
+
repetition_penalty=1.15,
|
| 239 |
+
pad_token_id=self.llm_tokenizer.pad_token_id,
|
| 240 |
+
eos_token_id=self.llm_tokenizer.eos_token_id
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
texts = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=False)
|
| 244 |
+
cleaned_texts = []
|
| 245 |
+
valid_count = 0
|
| 246 |
+
|
| 247 |
+
# Process responses with minimal logging
|
| 248 |
+
for idx, text in enumerate(texts):
|
| 249 |
+
if "[/INST]" in text and "</s>" in text:
|
| 250 |
+
response = text.split("[/INST]")[1].split("</s>")[0].strip()
|
| 251 |
+
response = response.strip().strip('"').strip("'")
|
| 252 |
+
|
| 253 |
+
word_count = len(response.split())
|
| 254 |
+
if (word_count >= 3 and word_count <= 50 and
|
| 255 |
+
not any(x in response.lower() for x in [
|
| 256 |
+
"generate", "requirements:", "reference",
|
| 257 |
+
"[inst]", "example"
|
| 258 |
+
])):
|
| 259 |
+
cleaned_texts.append(response)
|
| 260 |
+
valid_count += 1
|
| 261 |
+
|
| 262 |
+
# Log only summary statistics
|
| 263 |
+
if valid_count > 0:
|
| 264 |
+
logger.info(f"\nBatch Success: {valid_count}/{len(texts)} ({valid_count/len(texts)*100:.1f}%)")
|
| 265 |
+
|
| 266 |
+
return cleaned_texts
|
| 267 |
+
|
| 268 |
+
except Exception as e:
|
| 269 |
+
logger.error(f"Generation error: {str(e)}")
|
| 270 |
+
return []
|
| 271 |
+
|
| 272 |
+
def validate_toxicity(self, texts: List[str]) -> torch.Tensor:
|
| 273 |
+
"""Validate texts using fast logistic regression"""
|
| 274 |
+
if not texts:
|
| 275 |
+
return torch.zeros(0, dtype=torch.bool)
|
| 276 |
+
|
| 277 |
+
# Get validation mask from fast validator
|
| 278 |
+
validation_mask = self.validator.validate(texts)
|
| 279 |
+
|
| 280 |
+
# Convert to torch tensor
|
| 281 |
+
return torch.tensor(validation_mask, dtype=torch.bool, device=self.llm.device)
|
| 282 |
+
|
| 283 |
+
def validate_language(self, texts: List[str]) -> List[bool]:
|
| 284 |
+
"""Simple language validation"""
|
| 285 |
+
return [detect(text) == 'en' for text in texts]
|
| 286 |
+
|
| 287 |
+
def augment_dataset(self, target_samples: int = 500, batch_size: int = 32):
|
| 288 |
+
"""Main augmentation loop with progress bar and CSV saving"""
|
| 289 |
+
try:
|
| 290 |
+
start_time = time.time()
|
| 291 |
+
logger.info(f"Starting generation: target={target_samples}, batch_size={batch_size}")
|
| 292 |
+
generated_samples = []
|
| 293 |
+
stats = {
|
| 294 |
+
"total_attempts": 0,
|
| 295 |
+
"valid_samples": 0,
|
| 296 |
+
"batch_times": []
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
# Create output directory if it doesn't exist
|
| 300 |
+
output_dir = Path("dataset/augmented")
|
| 301 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 302 |
+
|
| 303 |
+
# Generate timestamp for the filename
|
| 304 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 305 |
+
output_file = output_dir / f"threat_augmented_{timestamp}.csv"
|
| 306 |
+
|
| 307 |
+
# Initialize progress bar
|
| 308 |
+
pbar = tqdm(total=target_samples,
|
| 309 |
+
desc="Generating samples",
|
| 310 |
+
unit="samples",
|
| 311 |
+
ncols=100,
|
| 312 |
+
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]')
|
| 313 |
+
|
| 314 |
+
while len(generated_samples) < target_samples:
|
| 315 |
+
batch_start = time.time()
|
| 316 |
+
|
| 317 |
+
seed_texts = self.en_threat_samples['comment_text'].sample(batch_size).tolist()
|
| 318 |
+
prompts = [self.generate_prompt(text) for text in seed_texts]
|
| 319 |
+
new_samples = self.generate_samples(prompts, seed_texts)
|
| 320 |
+
|
| 321 |
+
if not new_samples:
|
| 322 |
+
continue
|
| 323 |
+
|
| 324 |
+
# Update statistics
|
| 325 |
+
batch_time = time.time() - batch_start
|
| 326 |
+
stats["batch_times"].append(batch_time)
|
| 327 |
+
stats["total_attempts"] += len(new_samples)
|
| 328 |
+
prev_len = len(generated_samples)
|
| 329 |
+
generated_samples.extend(new_samples)
|
| 330 |
+
stats["valid_samples"] = len(generated_samples)
|
| 331 |
+
|
| 332 |
+
# Update progress bar
|
| 333 |
+
pbar.update(len(generated_samples) - prev_len)
|
| 334 |
+
|
| 335 |
+
# Calculate and display success rate periodically
|
| 336 |
+
if len(stats["batch_times"]) % 10 == 0: # Every 10 batches
|
| 337 |
+
success_rate = (stats["valid_samples"] / stats["total_attempts"]) * 100
|
| 338 |
+
avg_batch_time = sum(stats["batch_times"][-20:]) / min(len(stats["batch_times"]), 20)
|
| 339 |
+
pbar.set_postfix({
|
| 340 |
+
'Success Rate': f'{success_rate:.1f}%',
|
| 341 |
+
'Batch Time': f'{avg_batch_time:.2f}s'
|
| 342 |
+
})
|
| 343 |
+
|
| 344 |
+
# Cleanup
|
| 345 |
+
if len(generated_samples) % (batch_size * 5) == 0:
|
| 346 |
+
torch.cuda.empty_cache()
|
| 347 |
+
gc.collect()
|
| 348 |
+
|
| 349 |
+
# Close progress bar
|
| 350 |
+
pbar.close()
|
| 351 |
+
|
| 352 |
+
# Create DataFrame and save to CSV
|
| 353 |
+
df = pd.DataFrame({
|
| 354 |
+
'text': generated_samples[:target_samples],
|
| 355 |
+
'label': 1, # These are all threat samples
|
| 356 |
+
'source': 'augmented',
|
| 357 |
+
'timestamp': timestamp
|
| 358 |
+
})
|
| 359 |
+
|
| 360 |
+
# Save to CSV
|
| 361 |
+
df.to_csv(output_file, index=False)
|
| 362 |
+
logger.info(f"\nSaved {len(df)} samples to {output_file}")
|
| 363 |
+
|
| 364 |
+
# Final stats
|
| 365 |
+
total_time = str(timedelta(seconds=int(time.time() - start_time)))
|
| 366 |
+
logger.info(f"Generation complete: {len(generated_samples)} samples generated in {total_time}")
|
| 367 |
+
|
| 368 |
+
return df
|
| 369 |
+
|
| 370 |
+
except Exception as e:
|
| 371 |
+
logger.error(f"Generation failed: {str(e)}")
|
| 372 |
+
raise
|
| 373 |
+
|
| 374 |
+
if __name__ == "__main__":
|
| 375 |
+
torch.cuda.empty_cache()
|
| 376 |
+
gc.collect()
|
| 377 |
+
|
| 378 |
+
augmenter = ThreatAugmenter()
|
| 379 |
+
augmented_df = augmenter.augment_dataset(target_samples=500)
|
augmentation/toxic_augment.py
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import (
|
| 3 |
+
AutoModelForCausalLM,
|
| 4 |
+
AutoTokenizer,
|
| 5 |
+
BitsAndBytesConfig
|
| 6 |
+
)
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
from tqdm import tqdm
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import logging
|
| 12 |
+
import gc
|
| 13 |
+
from typing import List, Dict
|
| 14 |
+
import json
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
import time
|
| 17 |
+
import sys
|
| 18 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 19 |
+
from sklearn.linear_model import LogisticRegression
|
| 20 |
+
import joblib
|
| 21 |
+
import random
|
| 22 |
+
|
| 23 |
+
# Create log directories
|
| 24 |
+
log_dir = Path("logs")
|
| 25 |
+
log_dir.mkdir(exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# Get timestamp for log file
|
| 28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 29 |
+
log_file = log_dir / f"generation_{timestamp}.log"
|
| 30 |
+
|
| 31 |
+
# Configure logging
|
| 32 |
+
logging.basicConfig(
|
| 33 |
+
level=logging.INFO,
|
| 34 |
+
format='%(asctime)s | %(message)s',
|
| 35 |
+
handlers=[
|
| 36 |
+
logging.StreamHandler(sys.stdout),
|
| 37 |
+
logging.FileHandler(log_file)
|
| 38 |
+
]
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
logger.info(f"Starting new run. Log file: {log_file}")
|
| 43 |
+
|
| 44 |
+
class FastToxicValidator:
|
| 45 |
+
"""Fast toxicity validation using logistic regression"""
|
| 46 |
+
def __init__(self, model_path: str = "weights/toxic_validator.joblib"):
|
| 47 |
+
self.model_path = model_path
|
| 48 |
+
if Path(model_path).exists():
|
| 49 |
+
logger.info("Loading fast toxic validator...")
|
| 50 |
+
model_data = joblib.load(model_path)
|
| 51 |
+
self.vectorizers = model_data['vectorizers']
|
| 52 |
+
self.models = model_data['models']
|
| 53 |
+
logger.info("✓ Fast validator loaded")
|
| 54 |
+
else:
|
| 55 |
+
logger.info("Training fast toxic validator...")
|
| 56 |
+
self._train_validator()
|
| 57 |
+
logger.info("✓ Fast validator trained and saved")
|
| 58 |
+
|
| 59 |
+
def _train_validator(self):
|
| 60 |
+
"""Train logistic regression models for each toxicity type"""
|
| 61 |
+
# Load training data
|
| 62 |
+
train_df = pd.read_csv("dataset/split/train.csv")
|
| 63 |
+
|
| 64 |
+
# Labels to validate
|
| 65 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 66 |
+
|
| 67 |
+
self.vectorizers = {}
|
| 68 |
+
self.models = {}
|
| 69 |
+
|
| 70 |
+
# Train a model for each label
|
| 71 |
+
for label in labels:
|
| 72 |
+
# Create and fit vectorizer
|
| 73 |
+
vectorizer = TfidfVectorizer(
|
| 74 |
+
max_features=10000,
|
| 75 |
+
ngram_range=(1, 2),
|
| 76 |
+
strip_accents='unicode',
|
| 77 |
+
min_df=2
|
| 78 |
+
)
|
| 79 |
+
X = vectorizer.fit_transform(train_df['comment_text'].fillna(''))
|
| 80 |
+
y = train_df[label]
|
| 81 |
+
|
| 82 |
+
# Train model
|
| 83 |
+
model = LogisticRegression(
|
| 84 |
+
C=1.0,
|
| 85 |
+
class_weight='balanced',
|
| 86 |
+
max_iter=200,
|
| 87 |
+
n_jobs=-1
|
| 88 |
+
)
|
| 89 |
+
model.fit(X, y)
|
| 90 |
+
|
| 91 |
+
self.vectorizers[label] = vectorizer
|
| 92 |
+
self.models[label] = model
|
| 93 |
+
|
| 94 |
+
# Save models
|
| 95 |
+
joblib.dump({
|
| 96 |
+
'vectorizers': self.vectorizers,
|
| 97 |
+
'models': self.models
|
| 98 |
+
}, self.model_path)
|
| 99 |
+
|
| 100 |
+
def get_probabilities(self, texts: List[str], label: str) -> np.ndarray:
|
| 101 |
+
"""Get raw probabilities for a specific label"""
|
| 102 |
+
X = self.vectorizers[label].transform(texts)
|
| 103 |
+
return self.models[label].predict_proba(X)[:, 1]
|
| 104 |
+
|
| 105 |
+
def validate(self, texts: List[str], label: str, threshold: float = 0.5) -> List[bool]:
|
| 106 |
+
"""Validate texts using the fast model with a lower threshold of 0.5"""
|
| 107 |
+
# Vectorize texts
|
| 108 |
+
X = self.vectorizers[label].transform(texts)
|
| 109 |
+
|
| 110 |
+
# Get probabilities
|
| 111 |
+
probs = self.models[label].predict_proba(X)[:, 1]
|
| 112 |
+
|
| 113 |
+
# Return boolean mask with lower threshold
|
| 114 |
+
return probs >= threshold
|
| 115 |
+
|
| 116 |
+
class ToxicAugmenter:
|
| 117 |
+
def __init__(self):
|
| 118 |
+
logger.info("Initializing ToxicAugmenter...")
|
| 119 |
+
|
| 120 |
+
# Initialize generation buffer
|
| 121 |
+
self.generation_buffer = []
|
| 122 |
+
self.buffer_size = 100
|
| 123 |
+
|
| 124 |
+
# Multi-GPU setup
|
| 125 |
+
self.num_gpus = torch.cuda.device_count()
|
| 126 |
+
if self.num_gpus > 0:
|
| 127 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 128 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 129 |
+
logger.info(f"Found {self.num_gpus} GPUs:")
|
| 130 |
+
for i in range(self.num_gpus):
|
| 131 |
+
mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
|
| 132 |
+
logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f}GB)")
|
| 133 |
+
|
| 134 |
+
# Load models with optimized settings
|
| 135 |
+
logger.info("Loading Mistral-7B...")
|
| 136 |
+
|
| 137 |
+
# Configure model for multi-GPU with optimized settings
|
| 138 |
+
quantization_config = BitsAndBytesConfig(
|
| 139 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 140 |
+
bnb_4bit_quant_type="nf4",
|
| 141 |
+
bnb_4bit_use_double_quant=True
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Initialize tokenizer first
|
| 145 |
+
self.llm_tokenizer = AutoTokenizer.from_pretrained(
|
| 146 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 147 |
+
padding_side="left",
|
| 148 |
+
use_fast=True,
|
| 149 |
+
model_max_length=512, # Limit context size for faster processing
|
| 150 |
+
pad_token="</s>", # Set pad token explicitly
|
| 151 |
+
add_eos_token=True,
|
| 152 |
+
add_bos_token=True
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Set pad token id explicitly
|
| 156 |
+
self.llm_tokenizer.pad_token = self.llm_tokenizer.eos_token
|
| 157 |
+
self.llm_tokenizer.pad_token_id = self.llm_tokenizer.eos_token_id
|
| 158 |
+
|
| 159 |
+
# Now initialize model with tokenizer's pad token
|
| 160 |
+
self.llm = AutoModelForCausalLM.from_pretrained(
|
| 161 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 162 |
+
device_map="balanced",
|
| 163 |
+
torch_dtype=torch.float16,
|
| 164 |
+
quantization_config=quantization_config,
|
| 165 |
+
max_memory={0: "22GB", 1: "22GB"},
|
| 166 |
+
use_cache=True, # Enable KV cache for faster generation
|
| 167 |
+
pad_token_id=self.llm_tokenizer.pad_token_id
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
logger.info("✓ Mistral-7B loaded")
|
| 171 |
+
|
| 172 |
+
# Initialize validator
|
| 173 |
+
self.validator = FastToxicValidator()
|
| 174 |
+
logger.info("✓ Fast validator initialized")
|
| 175 |
+
|
| 176 |
+
def generate_prompt(self, seed_text: str, label: str) -> str:
|
| 177 |
+
"""Generate a prompt for a single toxicity type"""
|
| 178 |
+
base_prompt = """<s>[INST] You are tasked with generating a highly {type} comment. Your goal is to create content that would definitely be flagged as {type}.
|
| 179 |
+
|
| 180 |
+
Example for reference (generate something different): "{seed_text}"
|
| 181 |
+
|
| 182 |
+
Requirements:
|
| 183 |
+
- Must be extremely {type} in nature
|
| 184 |
+
- Use very strong language
|
| 185 |
+
- Must be different from the example
|
| 186 |
+
|
| 187 |
+
Generate ONLY the {type} comment (no explanations): [/INST]"""
|
| 188 |
+
|
| 189 |
+
type_mapping = {
|
| 190 |
+
'toxic': 'toxic and offensive',
|
| 191 |
+
'severe_toxic': 'toxic and hostile',
|
| 192 |
+
'obscene': 'obscene and vulgar',
|
| 193 |
+
'threat': 'threatening and intimidating',
|
| 194 |
+
'insult': 'insulting and demeaning',
|
| 195 |
+
'identity_hate': 'discriminatory and hateful'
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
return base_prompt.format(type=type_mapping[label], seed_text=seed_text)
|
| 199 |
+
|
| 200 |
+
def flush_buffer(self):
|
| 201 |
+
"""Flush the generation buffer to disk"""
|
| 202 |
+
if self.generation_buffer:
|
| 203 |
+
try:
|
| 204 |
+
with open(log_file, 'a', encoding='utf-8') as f:
|
| 205 |
+
for entry in self.generation_buffer:
|
| 206 |
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
| 207 |
+
self.generation_buffer = []
|
| 208 |
+
except Exception as e:
|
| 209 |
+
logger.error(f"Failed to flush buffer: {str(e)}")
|
| 210 |
+
|
| 211 |
+
def log_generation(self, seed_text: str, prompt: str, generated_text: str, validation_results: Dict[str, bool]):
|
| 212 |
+
"""Buffer log generation details with proper JSON serialization"""
|
| 213 |
+
# Convert numpy/torch boolean values to Python booleans
|
| 214 |
+
serializable_results = {
|
| 215 |
+
k: bool(v) for k, v in validation_results.items()
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
log_entry = {
|
| 219 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 220 |
+
"seed_text": seed_text,
|
| 221 |
+
"prompt": prompt,
|
| 222 |
+
"generated_text": generated_text,
|
| 223 |
+
"validation_results": serializable_results
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
self.generation_buffer.append(log_entry)
|
| 227 |
+
|
| 228 |
+
# Flush buffer if it reaches the size limit
|
| 229 |
+
if len(self.generation_buffer) >= self.buffer_size:
|
| 230 |
+
self.flush_buffer()
|
| 231 |
+
|
| 232 |
+
def validate_sample(self, text: str, label: str, attempts: int) -> bool:
|
| 233 |
+
"""Validate a generated sample for a single label"""
|
| 234 |
+
try:
|
| 235 |
+
# Get probability for the label
|
| 236 |
+
prob = self.validator.get_probabilities([text], label)[0]
|
| 237 |
+
|
| 238 |
+
# Super lenient thresholds to get samples
|
| 239 |
+
thresholds = {
|
| 240 |
+
'toxic': 0.3, # Very lenient to get started
|
| 241 |
+
'severe_toxic': 0.2,
|
| 242 |
+
'obscene': 0.2,
|
| 243 |
+
'threat': 0.2,
|
| 244 |
+
'insult': 0.2,
|
| 245 |
+
'identity_hate': 0.2
|
| 246 |
+
}
|
| 247 |
+
threshold = thresholds.get(label, 0.2)
|
| 248 |
+
|
| 249 |
+
# Log validation result
|
| 250 |
+
passed = prob >= threshold
|
| 251 |
+
if passed or (attempts % 5 == 0): # More frequent logging
|
| 252 |
+
logger.info(f"\nValidation - Label: {label}, Text: {text}")
|
| 253 |
+
logger.info(f"Probability: {prob:.3f}, Threshold: {threshold:.2f}, Passed: {passed}")
|
| 254 |
+
|
| 255 |
+
return passed
|
| 256 |
+
|
| 257 |
+
except Exception as e:
|
| 258 |
+
logger.error(f"Validation error: {str(e)}")
|
| 259 |
+
return False
|
| 260 |
+
|
| 261 |
+
def generate_samples(self, target_samples: int, label: str,
|
| 262 |
+
seed_texts: List[str], total_timeout: int = 300) -> pd.DataFrame:
|
| 263 |
+
"""Generate samples for a single label with timeouts"""
|
| 264 |
+
start_time = time.time()
|
| 265 |
+
generated_samples = []
|
| 266 |
+
attempts = 0
|
| 267 |
+
max_attempts = target_samples * 50 # Much more attempts allowed
|
| 268 |
+
batch_size = min(16, target_samples) # Smaller batch size for better control
|
| 269 |
+
|
| 270 |
+
pbar = tqdm(total=target_samples, desc=f"Generating {label} samples")
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
while len(generated_samples) < target_samples and attempts < max_attempts:
|
| 274 |
+
# Check timeout
|
| 275 |
+
if time.time() - start_time > total_timeout:
|
| 276 |
+
logger.warning(f"Generation timed out after {total_timeout} seconds")
|
| 277 |
+
break
|
| 278 |
+
|
| 279 |
+
attempts += 1
|
| 280 |
+
|
| 281 |
+
# Select random seed text and generate prompt
|
| 282 |
+
seed_text = random.choice(seed_texts)
|
| 283 |
+
prompt = self.generate_prompt(seed_text, label)
|
| 284 |
+
|
| 285 |
+
try:
|
| 286 |
+
# Generate text with optimized parameters
|
| 287 |
+
inputs = self.llm_tokenizer(prompt, return_tensors="pt", padding=True,
|
| 288 |
+
truncation=True, max_length=512).to(self.llm.device)
|
| 289 |
+
|
| 290 |
+
with torch.no_grad():
|
| 291 |
+
outputs = self.llm.generate(
|
| 292 |
+
**inputs,
|
| 293 |
+
max_new_tokens=200, # Doubled for longer content
|
| 294 |
+
num_beams=4, # Added beam search
|
| 295 |
+
temperature=1.35, # Higher temperature for more randomness
|
| 296 |
+
do_sample=True,
|
| 297 |
+
top_p=0.99, # Almost no filtering
|
| 298 |
+
top_k=200, # More options
|
| 299 |
+
num_return_sequences=1,
|
| 300 |
+
repetition_penalty=1.0, # No repetition penalty
|
| 301 |
+
no_repeat_ngram_size=0, # No ngram blocking
|
| 302 |
+
early_stopping=True, # Stop when complete
|
| 303 |
+
pad_token_id=self.llm_tokenizer.pad_token_id,
|
| 304 |
+
bos_token_id=self.llm_tokenizer.bos_token_id,
|
| 305 |
+
eos_token_id=self.llm_tokenizer.eos_token_id,
|
| 306 |
+
use_cache=True
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
text = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 310 |
+
|
| 311 |
+
# Extract the generated text after [/INST]
|
| 312 |
+
if "[/INST]" in text:
|
| 313 |
+
output = text.split("[/INST]")[1].strip()
|
| 314 |
+
output = output.strip().strip('"').strip("'")
|
| 315 |
+
|
| 316 |
+
# Only check minimum length
|
| 317 |
+
if len(output) >= 10:
|
| 318 |
+
# Log generation attempt
|
| 319 |
+
if attempts % 5 == 0: # More frequent logging
|
| 320 |
+
logger.info(f"\nAttempt {attempts}: Generated text: {output}")
|
| 321 |
+
|
| 322 |
+
# Validate sample
|
| 323 |
+
if self.validate_sample(output, label, attempts):
|
| 324 |
+
sample_dict = {'comment_text': output}
|
| 325 |
+
for l in ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']:
|
| 326 |
+
sample_dict[l] = 1 if l == label else 0
|
| 327 |
+
generated_samples.append(sample_dict)
|
| 328 |
+
pbar.update(1)
|
| 329 |
+
logger.info(f"✓ Valid {label} sample generated ({len(generated_samples)}/{target_samples})")
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
logger.error(f"Generation error on attempt {attempts}: {str(e)}")
|
| 333 |
+
continue
|
| 334 |
+
|
| 335 |
+
# Clear cache less frequently
|
| 336 |
+
if attempts % 200 == 0:
|
| 337 |
+
torch.cuda.empty_cache()
|
| 338 |
+
gc.collect()
|
| 339 |
+
|
| 340 |
+
finally:
|
| 341 |
+
pbar.close()
|
| 342 |
+
logger.info(f"Generation finished: {len(generated_samples)}/{target_samples} samples in {attempts} attempts")
|
| 343 |
+
|
| 344 |
+
# Return results even if partial
|
| 345 |
+
if generated_samples:
|
| 346 |
+
return pd.DataFrame(generated_samples)
|
| 347 |
+
return None
|
| 348 |
+
|
| 349 |
+
def augment_dataset(self, target_samples: int, label: str, seed_texts: List[str], timeout_minutes: int = 5) -> pd.DataFrame:
|
| 350 |
+
"""Generate a specific number of samples with given label combination"""
|
| 351 |
+
logger.info(f"\nGenerating {target_samples} samples with label: {label}")
|
| 352 |
+
|
| 353 |
+
generated_samples = []
|
| 354 |
+
batch_size = min(32, target_samples)
|
| 355 |
+
start_time = time.time()
|
| 356 |
+
timeout_seconds = min(timeout_minutes * 60, 300) # Hard limit of 5 minutes
|
| 357 |
+
total_generated = 0
|
| 358 |
+
pbar = None
|
| 359 |
+
|
| 360 |
+
try:
|
| 361 |
+
# Create progress bar
|
| 362 |
+
pbar = tqdm(
|
| 363 |
+
total=target_samples,
|
| 364 |
+
desc="Generating",
|
| 365 |
+
unit="samples",
|
| 366 |
+
ncols=100,
|
| 367 |
+
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]'
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
while total_generated < target_samples:
|
| 371 |
+
# Check timeout
|
| 372 |
+
elapsed_time = time.time() - start_time
|
| 373 |
+
if elapsed_time > timeout_seconds:
|
| 374 |
+
logger.warning(f"Time limit reached after {elapsed_time/60:.1f} minutes")
|
| 375 |
+
break
|
| 376 |
+
|
| 377 |
+
# Calculate remaining samples needed
|
| 378 |
+
remaining = target_samples - total_generated
|
| 379 |
+
current_batch_size = min(batch_size, remaining)
|
| 380 |
+
|
| 381 |
+
# Select batch of seed texts
|
| 382 |
+
batch_seeds = np.random.choice(seed_texts, size=current_batch_size)
|
| 383 |
+
prompts = [self.generate_prompt(seed, label) for seed in batch_seeds]
|
| 384 |
+
|
| 385 |
+
# Generate and validate samples
|
| 386 |
+
batch_start = time.time()
|
| 387 |
+
new_samples = self.generate_samples(
|
| 388 |
+
target_samples=current_batch_size,
|
| 389 |
+
label=label,
|
| 390 |
+
seed_texts=batch_seeds,
|
| 391 |
+
total_timeout=timeout_seconds - elapsed_time
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
if new_samples is not None and not new_samples.empty:
|
| 395 |
+
if len(new_samples) > remaining:
|
| 396 |
+
new_samples = new_samples.head(remaining)
|
| 397 |
+
|
| 398 |
+
generated_samples.append(new_samples)
|
| 399 |
+
num_new = len(new_samples)
|
| 400 |
+
total_generated += num_new
|
| 401 |
+
|
| 402 |
+
# Update progress bar
|
| 403 |
+
pbar.update(num_new)
|
| 404 |
+
|
| 405 |
+
# Calculate and display metrics
|
| 406 |
+
elapsed_minutes = elapsed_time / 60
|
| 407 |
+
rate = total_generated / elapsed_minutes if elapsed_minutes > 0 else 0
|
| 408 |
+
batch_time = time.time() - batch_start
|
| 409 |
+
time_remaining = max(0, timeout_seconds - elapsed_time)
|
| 410 |
+
|
| 411 |
+
pbar.set_postfix({
|
| 412 |
+
'rate': f'{rate:.1f}/min',
|
| 413 |
+
'batch': f'{batch_time:.1f}s',
|
| 414 |
+
'remain': f'{time_remaining:.0f}s'
|
| 415 |
+
}, refresh=True)
|
| 416 |
+
|
| 417 |
+
# Memory management every few batches
|
| 418 |
+
if total_generated % (batch_size * 4) == 0:
|
| 419 |
+
torch.cuda.empty_cache()
|
| 420 |
+
|
| 421 |
+
# Combine all generated samples
|
| 422 |
+
if generated_samples:
|
| 423 |
+
final_df = pd.concat(generated_samples, ignore_index=True)
|
| 424 |
+
if len(final_df) > target_samples:
|
| 425 |
+
final_df = final_df.head(target_samples)
|
| 426 |
+
logger.info(f"Successfully generated {len(final_df)} samples in {elapsed_time/60:.1f} minutes")
|
| 427 |
+
return final_df
|
| 428 |
+
|
| 429 |
+
return None
|
| 430 |
+
|
| 431 |
+
except Exception as e:
|
| 432 |
+
logger.error(f"Generation error: {str(e)}")
|
| 433 |
+
return None
|
| 434 |
+
finally:
|
| 435 |
+
if pbar is not None:
|
| 436 |
+
pbar.close()
|
| 437 |
+
# Final cleanup
|
| 438 |
+
self.flush_buffer()
|
| 439 |
+
torch.cuda.empty_cache()
|
datacard.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Jigsaw Toxic Comment Classification Dataset
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
Version: 1.0
|
| 5 |
+
Date Created: 2025-02-03
|
| 6 |
+
|
| 7 |
+
### Description
|
| 8 |
+
|
| 9 |
+
The Jigsaw Toxic Comment Classification Dataset is designed to help identify and classify toxic online comments.
|
| 10 |
+
It contains text comments with multiple toxicity-related labels including general toxicity, severe toxicity,
|
| 11 |
+
obscenity, threats, insults, and identity-based hate speech.
|
| 12 |
+
|
| 13 |
+
The dataset includes:
|
| 14 |
+
1. Main training data with binary toxicity labels
|
| 15 |
+
2. Unintended bias training data with additional identity attributes
|
| 16 |
+
3. Processed versions with sequence length 128 for direct model input
|
| 17 |
+
4. Test and validation sets for model evaluation
|
| 18 |
+
|
| 19 |
+
This dataset was created by Jigsaw and Google's Conversation AI team to help improve online conversation quality
|
| 20 |
+
by identifying and classifying various forms of toxic comments.
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
## Column Descriptions
|
| 24 |
+
|
| 25 |
+
- **id**: Unique identifier for each comment
|
| 26 |
+
- **comment_text**: The text content of the comment to be classified
|
| 27 |
+
- **toxic**: Binary label indicating if the comment is toxic
|
| 28 |
+
- **severe_toxic**: Binary label for extremely toxic comments
|
| 29 |
+
- **obscene**: Binary label for obscene content
|
| 30 |
+
- **threat**: Binary label for threatening content
|
| 31 |
+
- **insult**: Binary label for insulting content
|
| 32 |
+
- **identity_hate**: Binary label for identity-based hate speech
|
| 33 |
+
- **target**: Overall toxicity score (in bias dataset)
|
| 34 |
+
- **identity_attack**: Binary label for identity-based attacks
|
| 35 |
+
- **identity_***: Various identity-related attributes in the bias dataset
|
| 36 |
+
- **lang**: Language of the comment
|
| 37 |
+
|
| 38 |
+
## Files
|
| 39 |
+
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
toxic-classifier:
|
| 5 |
+
build: .
|
| 6 |
+
runtime: nvidia # Enable NVIDIA runtime for GPU support
|
| 7 |
+
environment:
|
| 8 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
| 9 |
+
- WANDB_API_KEY=${WANDB_API_KEY} # Set this in .env file
|
| 10 |
+
volumes:
|
| 11 |
+
- ./dataset:/app/dataset # Mount dataset directory
|
| 12 |
+
- ./weights:/app/weights # Mount weights directory
|
| 13 |
+
command: python model/train.py # Default command, can be overridden
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_identity_hate.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_insult.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_obscene.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_severe_toxic.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_threat.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_0.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_1.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_2.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_3.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_4.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_5.png
ADDED
|
evaluation_results/eval_20250208_161149/confusion_matrices/cm_toxic_6.png
ADDED
|
evaluation_results/eval_20250208_161149/eval_params.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20250208_161149",
|
| 3 |
+
"model_path": "weights/toxic_classifier_xlm-roberta-large",
|
| 4 |
+
"test_file": "dataset/split/test.csv",
|
| 5 |
+
"batch_size": 32,
|
| 6 |
+
"num_workers": null
|
| 7 |
+
}
|
evaluation_results/eval_20250208_161149/evaluation_results.json
ADDED
|
@@ -0,0 +1,2020 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overall": {
|
| 3 |
+
"loss": 0.18776385083473274,
|
| 4 |
+
"auc_macro": 0.9259171799699759,
|
| 5 |
+
"auc_weighted": 0.9442696333538418,
|
| 6 |
+
"precision_macro": 0.4388604553772207,
|
| 7 |
+
"precision_weighted": 0.7008073672218381,
|
| 8 |
+
"recall_macro": 0.8836014181101747,
|
| 9 |
+
"recall_weighted": 0.9051010634378761,
|
| 10 |
+
"f1_macro": 0.530782857064369,
|
| 11 |
+
"f1_weighted": 0.7669279374035199,
|
| 12 |
+
"class_support": {
|
| 13 |
+
"toxic": 17646,
|
| 14 |
+
"severe_toxic": 1649,
|
| 15 |
+
"obscene": 8625,
|
| 16 |
+
"threat": 714,
|
| 17 |
+
"insult": 10201,
|
| 18 |
+
"identity_hate": 1882
|
| 19 |
+
},
|
| 20 |
+
"per_class_metrics": {
|
| 21 |
+
"toxic": {
|
| 22 |
+
"precision": 0.9115322083309974,
|
| 23 |
+
"recall": 0.9213986172503683,
|
| 24 |
+
"f1": 0.9164388580446975,
|
| 25 |
+
"support": 17646,
|
| 26 |
+
"specificity": 0.9121478677207437
|
| 27 |
+
},
|
| 28 |
+
"severe_toxic": {
|
| 29 |
+
"precision": 0.15755900489049543,
|
| 30 |
+
"recall": 0.8987265009096422,
|
| 31 |
+
"f1": 0.26811397557666217,
|
| 32 |
+
"support": 1649,
|
| 33 |
+
"specificity": 0.7666597956359139
|
| 34 |
+
},
|
| 35 |
+
"obscene": {
|
| 36 |
+
"precision": 0.6238325281803543,
|
| 37 |
+
"recall": 0.8983188405797101,
|
| 38 |
+
"f1": 0.7363269185079592,
|
| 39 |
+
"support": 8625,
|
| 40 |
+
"specificity": 0.8268539450765297
|
| 41 |
+
},
|
| 42 |
+
"threat": {
|
| 43 |
+
"precision": 0.10505486598309048,
|
| 44 |
+
"recall": 0.8179271708683473,
|
| 45 |
+
"f1": 0.18619480312450185,
|
| 46 |
+
"support": 714,
|
| 47 |
+
"specificity": 0.8574253453315757
|
| 48 |
+
},
|
| 49 |
+
"insult": {
|
| 50 |
+
"precision": 0.6205890336590663,
|
| 51 |
+
"recall": 0.8964807371826291,
|
| 52 |
+
"f1": 0.7334482896900189,
|
| 53 |
+
"support": 10201,
|
| 54 |
+
"specificity": 0.7799425355217067
|
| 55 |
+
},
|
| 56 |
+
"identity_hate": {
|
| 57 |
+
"precision": 0.21459509121932013,
|
| 58 |
+
"recall": 0.8687566418703507,
|
| 59 |
+
"f1": 0.3441742974423745,
|
| 60 |
+
"support": 1882,
|
| 61 |
+
"specificity": 0.822570123939987
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
"class_weights": {
|
| 65 |
+
"toxic": 0.43338163420684234,
|
| 66 |
+
"severe_toxic": 0.04049905444900165,
|
| 67 |
+
"obscene": 0.21182798339759806,
|
| 68 |
+
"threat": 0.017535673060392463,
|
| 69 |
+
"insult": 0.2505341749146548,
|
| 70 |
+
"identity_hate": 0.04622147997151067
|
| 71 |
+
},
|
| 72 |
+
"hamming_loss": 0.1618924586235303,
|
| 73 |
+
"exact_match": 0.499747247809481,
|
| 74 |
+
"specificity_macro": 0.8275999355377427,
|
| 75 |
+
"specificity_weighted": 0.8275999355377428,
|
| 76 |
+
"summary": {
|
| 77 |
+
"auc": {
|
| 78 |
+
"macro": 0.9259171799699759,
|
| 79 |
+
"weighted": 0.9442696333538418
|
| 80 |
+
},
|
| 81 |
+
"f1": {
|
| 82 |
+
"macro": 0.530782857064369,
|
| 83 |
+
"weighted": 0.7669279374035199
|
| 84 |
+
},
|
| 85 |
+
"precision": {
|
| 86 |
+
"macro": 0.4388604553772207,
|
| 87 |
+
"weighted": 0.7008073672218381
|
| 88 |
+
},
|
| 89 |
+
"recall": {
|
| 90 |
+
"macro": 0.8836014181101747,
|
| 91 |
+
"weighted": 0.9051010634378761
|
| 92 |
+
},
|
| 93 |
+
"specificity": {
|
| 94 |
+
"macro": 0.8275999355377427,
|
| 95 |
+
"weighted": 0.8275999355377428
|
| 96 |
+
},
|
| 97 |
+
"other_metrics": {
|
| 98 |
+
"hamming_loss": 0.1618924586235303,
|
| 99 |
+
"exact_match": 0.499747247809481
|
| 100 |
+
},
|
| 101 |
+
"class_support": {
|
| 102 |
+
"toxic": 17646,
|
| 103 |
+
"severe_toxic": 1649,
|
| 104 |
+
"obscene": 8625,
|
| 105 |
+
"threat": 714,
|
| 106 |
+
"insult": 10201,
|
| 107 |
+
"identity_hate": 1882
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
},
|
| 111 |
+
"per_language": {
|
| 112 |
+
"0": {
|
| 113 |
+
"auc": 0.9546775894690953,
|
| 114 |
+
"precision": 0.714413481020392,
|
| 115 |
+
"recall": 0.9246670642019479,
|
| 116 |
+
"f1": 0.7877150106257862,
|
| 117 |
+
"hamming_loss": 0.12826939843068874,
|
| 118 |
+
"exact_match": 0.5564516129032258,
|
| 119 |
+
"specificity": 0.8596476657420098,
|
| 120 |
+
"class_metrics": {
|
| 121 |
+
"toxic": {
|
| 122 |
+
"auc": 0.9621138334064959,
|
| 123 |
+
"threshold": 0.46047261357307434,
|
| 124 |
+
"precision": 0.8825137733163603,
|
| 125 |
+
"recall": 0.9342830882352909,
|
| 126 |
+
"f1": 0.9076608519017388,
|
| 127 |
+
"specificity": 0.8756218905472631,
|
| 128 |
+
"npv": 0.9301878222768437,
|
| 129 |
+
"positive_samples": 2176,
|
| 130 |
+
"true_positives": 2143,
|
| 131 |
+
"false_positives": 285,
|
| 132 |
+
"true_negatives": 2008,
|
| 133 |
+
"false_negatives": 150,
|
| 134 |
+
"auc_ci": [
|
| 135 |
+
0.9621138334064959,
|
| 136 |
+
0.9621138334064959
|
| 137 |
+
],
|
| 138 |
+
"precision_ci": [
|
| 139 |
+
0.8825137733163603,
|
| 140 |
+
0.8825137733163603
|
| 141 |
+
],
|
| 142 |
+
"recall_ci": [
|
| 143 |
+
0.9342830882352909,
|
| 144 |
+
0.9342830882352909
|
| 145 |
+
],
|
| 146 |
+
"f1_ci": [
|
| 147 |
+
0.9076608519017388,
|
| 148 |
+
0.9076608519017388
|
| 149 |
+
],
|
| 150 |
+
"specificity_ci": [
|
| 151 |
+
0.8756218905472631,
|
| 152 |
+
0.8756218905472631
|
| 153 |
+
],
|
| 154 |
+
"npv_ci": [
|
| 155 |
+
0.9301878222768437,
|
| 156 |
+
0.9301878222768437
|
| 157 |
+
],
|
| 158 |
+
"class_weights": {
|
| 159 |
+
"0.0": 0.951077943615257,
|
| 160 |
+
"1.0": 1.0542279411764706
|
| 161 |
+
}
|
| 162 |
+
},
|
| 163 |
+
"severe_toxic": {
|
| 164 |
+
"auc": 0.9499761279127715,
|
| 165 |
+
"threshold": 0.03537772223353386,
|
| 166 |
+
"precision": 0.8608043862269837,
|
| 167 |
+
"recall": 0.9492385786802037,
|
| 168 |
+
"f1": 0.9028611452277716,
|
| 169 |
+
"specificity": 0.8465042131632855,
|
| 170 |
+
"npv": 0.9434265401805545,
|
| 171 |
+
"positive_samples": 197,
|
| 172 |
+
"true_positives": 2177,
|
| 173 |
+
"false_positives": 352,
|
| 174 |
+
"true_negatives": 1941,
|
| 175 |
+
"false_negatives": 116,
|
| 176 |
+
"auc_ci": [
|
| 177 |
+
0.9499761279127715,
|
| 178 |
+
0.9499761279127715
|
| 179 |
+
],
|
| 180 |
+
"precision_ci": [
|
| 181 |
+
0.8608043862269837,
|
| 182 |
+
0.8608043862269837
|
| 183 |
+
],
|
| 184 |
+
"recall_ci": [
|
| 185 |
+
0.9492385786802037,
|
| 186 |
+
0.9492385786802037
|
| 187 |
+
],
|
| 188 |
+
"f1_ci": [
|
| 189 |
+
0.9028611452277716,
|
| 190 |
+
0.9028611452277716
|
| 191 |
+
],
|
| 192 |
+
"specificity_ci": [
|
| 193 |
+
0.8465042131632855,
|
| 194 |
+
0.8465042131632855
|
| 195 |
+
],
|
| 196 |
+
"npv_ci": [
|
| 197 |
+
0.9434265401805545,
|
| 198 |
+
0.9434265401805545
|
| 199 |
+
],
|
| 200 |
+
"class_weights": {
|
| 201 |
+
"0.0": 0.5224322477795491,
|
| 202 |
+
"1.0": 11.644670050761421
|
| 203 |
+
}
|
| 204 |
+
},
|
| 205 |
+
"obscene": {
|
| 206 |
+
"auc": 0.9572805958351019,
|
| 207 |
+
"threshold": 0.2777131497859955,
|
| 208 |
+
"precision": 0.8724828332798461,
|
| 209 |
+
"recall": 0.9115977291159771,
|
| 210 |
+
"f1": 0.8916114958872817,
|
| 211 |
+
"specificity": 0.8667660208643849,
|
| 212 |
+
"npv": 0.9074484866722257,
|
| 213 |
+
"positive_samples": 1233,
|
| 214 |
+
"true_positives": 2091,
|
| 215 |
+
"false_positives": 305,
|
| 216 |
+
"true_negatives": 1988,
|
| 217 |
+
"false_negatives": 202,
|
| 218 |
+
"auc_ci": [
|
| 219 |
+
0.9572805958351019,
|
| 220 |
+
0.9572805958351019
|
| 221 |
+
],
|
| 222 |
+
"precision_ci": [
|
| 223 |
+
0.8724828332798461,
|
| 224 |
+
0.8724828332798461
|
| 225 |
+
],
|
| 226 |
+
"recall_ci": [
|
| 227 |
+
0.9115977291159771,
|
| 228 |
+
0.9115977291159771
|
| 229 |
+
],
|
| 230 |
+
"f1_ci": [
|
| 231 |
+
0.8916114958872817,
|
| 232 |
+
0.8916114958872817
|
| 233 |
+
],
|
| 234 |
+
"specificity_ci": [
|
| 235 |
+
0.8667660208643849,
|
| 236 |
+
0.8667660208643849
|
| 237 |
+
],
|
| 238 |
+
"npv_ci": [
|
| 239 |
+
0.9074484866722257,
|
| 240 |
+
0.9074484866722257
|
| 241 |
+
],
|
| 242 |
+
"class_weights": {
|
| 243 |
+
"0.0": 0.6837555886736214,
|
| 244 |
+
"1.0": 1.8605028386050284
|
| 245 |
+
}
|
| 246 |
+
},
|
| 247 |
+
"threat": {
|
| 248 |
+
"auc": 0.9697358146798531,
|
| 249 |
+
"threshold": 0.016539234668016434,
|
| 250 |
+
"precision": 0.9045252081854022,
|
| 251 |
+
"recall": 0.9117647058823535,
|
| 252 |
+
"f1": 0.9081305291811165,
|
| 253 |
+
"specificity": 0.9037610619468958,
|
| 254 |
+
"npv": 0.9110528041980915,
|
| 255 |
+
"positive_samples": 68,
|
| 256 |
+
"true_positives": 2091,
|
| 257 |
+
"false_positives": 220,
|
| 258 |
+
"true_negatives": 2073,
|
| 259 |
+
"false_negatives": 202,
|
| 260 |
+
"auc_ci": [
|
| 261 |
+
0.9697358146798531,
|
| 262 |
+
0.9697358146798531
|
| 263 |
+
],
|
| 264 |
+
"precision_ci": [
|
| 265 |
+
0.9045252081854022,
|
| 266 |
+
0.9045252081854022
|
| 267 |
+
],
|
| 268 |
+
"recall_ci": [
|
| 269 |
+
0.9117647058823535,
|
| 270 |
+
0.9117647058823535
|
| 271 |
+
],
|
| 272 |
+
"f1_ci": [
|
| 273 |
+
0.9081305291811165,
|
| 274 |
+
0.9081305291811165
|
| 275 |
+
],
|
| 276 |
+
"specificity_ci": [
|
| 277 |
+
0.9037610619468958,
|
| 278 |
+
0.9037610619468958
|
| 279 |
+
],
|
| 280 |
+
"npv_ci": [
|
| 281 |
+
0.9110528041980915,
|
| 282 |
+
0.9110528041980915
|
| 283 |
+
],
|
| 284 |
+
"class_weights": {
|
| 285 |
+
"0.0": 0.5075221238938054,
|
| 286 |
+
"1.0": 33.73529411764706
|
| 287 |
+
}
|
| 288 |
+
},
|
| 289 |
+
"insult": {
|
| 290 |
+
"auc": 0.935014291573492,
|
| 291 |
+
"threshold": 0.25907590985298157,
|
| 292 |
+
"precision": 0.833978890287596,
|
| 293 |
+
"recall": 0.9098862642169729,
|
| 294 |
+
"f1": 0.8702805202104968,
|
| 295 |
+
"specificity": 0.8188679245282912,
|
| 296 |
+
"npv": 0.900862976980011,
|
| 297 |
+
"positive_samples": 1143,
|
| 298 |
+
"true_positives": 2087,
|
| 299 |
+
"false_positives": 415,
|
| 300 |
+
"true_negatives": 1878,
|
| 301 |
+
"false_negatives": 206,
|
| 302 |
+
"auc_ci": [
|
| 303 |
+
0.935014291573492,
|
| 304 |
+
0.935014291573492
|
| 305 |
+
],
|
| 306 |
+
"precision_ci": [
|
| 307 |
+
0.833978890287596,
|
| 308 |
+
0.833978890287596
|
| 309 |
+
],
|
| 310 |
+
"recall_ci": [
|
| 311 |
+
0.9098862642169729,
|
| 312 |
+
0.9098862642169729
|
| 313 |
+
],
|
| 314 |
+
"f1_ci": [
|
| 315 |
+
0.8702805202104968,
|
| 316 |
+
0.8702805202104968
|
| 317 |
+
],
|
| 318 |
+
"specificity_ci": [
|
| 319 |
+
0.8188679245282912,
|
| 320 |
+
0.8188679245282912
|
| 321 |
+
],
|
| 322 |
+
"npv_ci": [
|
| 323 |
+
0.900862976980011,
|
| 324 |
+
0.900862976980011
|
| 325 |
+
],
|
| 326 |
+
"class_weights": {
|
| 327 |
+
"0.0": 0.6658925979680697,
|
| 328 |
+
"1.0": 2.0069991251093615
|
| 329 |
+
}
|
| 330 |
+
},
|
| 331 |
+
"identity_hate": {
|
| 332 |
+
"auc": 0.9686336850292078,
|
| 333 |
+
"threshold": 0.026042653247714043,
|
| 334 |
+
"precision": 0.8623651962191886,
|
| 335 |
+
"recall": 0.9626168224299065,
|
| 336 |
+
"f1": 0.909737451082551,
|
| 337 |
+
"specificity": 0.8463648834019236,
|
| 338 |
+
"npv": 0.9576992819322562,
|
| 339 |
+
"positive_samples": 214,
|
| 340 |
+
"true_positives": 2208,
|
| 341 |
+
"false_positives": 352,
|
| 342 |
+
"true_negatives": 1941,
|
| 343 |
+
"false_negatives": 85,
|
| 344 |
+
"auc_ci": [
|
| 345 |
+
0.9686336850292078,
|
| 346 |
+
0.9686336850292078
|
| 347 |
+
],
|
| 348 |
+
"precision_ci": [
|
| 349 |
+
0.8623651962191886,
|
| 350 |
+
0.8623651962191886
|
| 351 |
+
],
|
| 352 |
+
"recall_ci": [
|
| 353 |
+
0.9626168224299065,
|
| 354 |
+
0.9626168224299065
|
| 355 |
+
],
|
| 356 |
+
"f1_ci": [
|
| 357 |
+
0.909737451082551,
|
| 358 |
+
0.909737451082551
|
| 359 |
+
],
|
| 360 |
+
"specificity_ci": [
|
| 361 |
+
0.8463648834019236,
|
| 362 |
+
0.8463648834019236
|
| 363 |
+
],
|
| 364 |
+
"npv_ci": [
|
| 365 |
+
0.9576992819322562,
|
| 366 |
+
0.9576992819322562
|
| 367 |
+
],
|
| 368 |
+
"class_weights": {
|
| 369 |
+
"0.0": 0.5244627343392776,
|
| 370 |
+
"1.0": 10.719626168224298
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"sample_count": 4588
|
| 375 |
+
},
|
| 376 |
+
"1": {
|
| 377 |
+
"auc": 0.9420109561343032,
|
| 378 |
+
"precision": 0.7054445371054338,
|
| 379 |
+
"recall": 0.8937771830043493,
|
| 380 |
+
"f1": 0.7655260008199765,
|
| 381 |
+
"hamming_loss": 0.16467680852429553,
|
| 382 |
+
"exact_match": 0.49354900828037745,
|
| 383 |
+
"specificity": 0.8275039240639036,
|
| 384 |
+
"class_metrics": {
|
| 385 |
+
"toxic": {
|
| 386 |
+
"auc": 0.970066021237747,
|
| 387 |
+
"threshold": 0.44148319959640503,
|
| 388 |
+
"precision": 0.9051201281749973,
|
| 389 |
+
"recall": 0.916216216216217,
|
| 390 |
+
"f1": 0.910634371966946,
|
| 391 |
+
"specificity": 0.903956972723781,
|
| 392 |
+
"npv": 0.9151763423430814,
|
| 393 |
+
"positive_samples": 2590,
|
| 394 |
+
"true_positives": 2378,
|
| 395 |
+
"false_positives": 249,
|
| 396 |
+
"true_negatives": 2347,
|
| 397 |
+
"false_negatives": 217,
|
| 398 |
+
"auc_ci": [
|
| 399 |
+
0.970066021237747,
|
| 400 |
+
0.970066021237747
|
| 401 |
+
],
|
| 402 |
+
"precision_ci": [
|
| 403 |
+
0.9051201281749973,
|
| 404 |
+
0.9051201281749973
|
| 405 |
+
],
|
| 406 |
+
"recall_ci": [
|
| 407 |
+
0.916216216216217,
|
| 408 |
+
0.916216216216217
|
| 409 |
+
],
|
| 410 |
+
"f1_ci": [
|
| 411 |
+
0.910634371966946,
|
| 412 |
+
0.910634371966946
|
| 413 |
+
],
|
| 414 |
+
"specificity_ci": [
|
| 415 |
+
0.903956972723781,
|
| 416 |
+
0.903956972723781
|
| 417 |
+
],
|
| 418 |
+
"npv_ci": [
|
| 419 |
+
0.9151763423430814,
|
| 420 |
+
0.9151763423430814
|
| 421 |
+
],
|
| 422 |
+
"class_weights": {
|
| 423 |
+
"0.0": 0.9975028812908183,
|
| 424 |
+
"1.0": 1.0025096525096524
|
| 425 |
+
}
|
| 426 |
+
},
|
| 427 |
+
"severe_toxic": {
|
| 428 |
+
"auc": 0.9032119421376688,
|
| 429 |
+
"threshold": 0.03648429363965988,
|
| 430 |
+
"precision": 0.8147008122253235,
|
| 431 |
+
"recall": 0.8688524590163955,
|
| 432 |
+
"f1": 0.8409057392553343,
|
| 433 |
+
"specificity": 0.8023843200646473,
|
| 434 |
+
"npv": 0.8595146599106457,
|
| 435 |
+
"positive_samples": 244,
|
| 436 |
+
"true_positives": 2255,
|
| 437 |
+
"false_positives": 513,
|
| 438 |
+
"true_negatives": 2083,
|
| 439 |
+
"false_negatives": 340,
|
| 440 |
+
"auc_ci": [
|
| 441 |
+
0.9032119421376688,
|
| 442 |
+
0.9032119421376688
|
| 443 |
+
],
|
| 444 |
+
"precision_ci": [
|
| 445 |
+
0.8147008122253235,
|
| 446 |
+
0.8147008122253235
|
| 447 |
+
],
|
| 448 |
+
"recall_ci": [
|
| 449 |
+
0.8688524590163955,
|
| 450 |
+
0.8688524590163955
|
| 451 |
+
],
|
| 452 |
+
"f1_ci": [
|
| 453 |
+
0.8409057392553343,
|
| 454 |
+
0.8409057392553343
|
| 455 |
+
],
|
| 456 |
+
"specificity_ci": [
|
| 457 |
+
0.8023843200646473,
|
| 458 |
+
0.8023843200646473
|
| 459 |
+
],
|
| 460 |
+
"npv_ci": [
|
| 461 |
+
0.8595146599106457,
|
| 462 |
+
0.8595146599106457
|
| 463 |
+
],
|
| 464 |
+
"class_weights": {
|
| 465 |
+
"0.0": 0.5246514447363103,
|
| 466 |
+
"1.0": 10.64139344262295
|
| 467 |
+
}
|
| 468 |
+
},
|
| 469 |
+
"obscene": {
|
| 470 |
+
"auc": 0.9387485218400086,
|
| 471 |
+
"threshold": 0.1990610957145691,
|
| 472 |
+
"precision": 0.8573644543610149,
|
| 473 |
+
"recall": 0.8723747980614001,
|
| 474 |
+
"f1": 0.8648044977770555,
|
| 475 |
+
"specificity": 0.8548672566371623,
|
| 476 |
+
"npv": 0.8701005785595336,
|
| 477 |
+
"positive_samples": 1238,
|
| 478 |
+
"true_positives": 2265,
|
| 479 |
+
"false_positives": 376,
|
| 480 |
+
"true_negatives": 2219,
|
| 481 |
+
"false_negatives": 331,
|
| 482 |
+
"auc_ci": [
|
| 483 |
+
0.9387485218400086,
|
| 484 |
+
0.9387485218400086
|
| 485 |
+
],
|
| 486 |
+
"precision_ci": [
|
| 487 |
+
0.8573644543610149,
|
| 488 |
+
0.8573644543610149
|
| 489 |
+
],
|
| 490 |
+
"recall_ci": [
|
| 491 |
+
0.8723747980614001,
|
| 492 |
+
0.8723747980614001
|
| 493 |
+
],
|
| 494 |
+
"f1_ci": [
|
| 495 |
+
0.8648044977770555,
|
| 496 |
+
0.8648044977770555
|
| 497 |
+
],
|
| 498 |
+
"specificity_ci": [
|
| 499 |
+
0.8548672566371623,
|
| 500 |
+
0.8548672566371623
|
| 501 |
+
],
|
| 502 |
+
"npv_ci": [
|
| 503 |
+
0.8701005785595336,
|
| 504 |
+
0.8701005785595336
|
| 505 |
+
],
|
| 506 |
+
"class_weights": {
|
| 507 |
+
"0.0": 0.6565107458912769,
|
| 508 |
+
"1.0": 2.097334410339257
|
| 509 |
+
}
|
| 510 |
+
},
|
| 511 |
+
"threat": {
|
| 512 |
+
"auc": 0.930141945247047,
|
| 513 |
+
"threshold": 0.012619060464203358,
|
| 514 |
+
"precision": 0.8505847769217403,
|
| 515 |
+
"recall": 0.8773584905660369,
|
| 516 |
+
"f1": 0.8637642103418028,
|
| 517 |
+
"specificity": 0.8458816591311225,
|
| 518 |
+
"npv": 0.8733726632315268,
|
| 519 |
+
"positive_samples": 106,
|
| 520 |
+
"true_positives": 2278,
|
| 521 |
+
"false_positives": 400,
|
| 522 |
+
"true_negatives": 2196,
|
| 523 |
+
"false_negatives": 318,
|
| 524 |
+
"auc_ci": [
|
| 525 |
+
0.930141945247047,
|
| 526 |
+
0.930141945247047
|
| 527 |
+
],
|
| 528 |
+
"precision_ci": [
|
| 529 |
+
0.8505847769217403,
|
| 530 |
+
0.8505847769217403
|
| 531 |
+
],
|
| 532 |
+
"recall_ci": [
|
| 533 |
+
0.8773584905660369,
|
| 534 |
+
0.8773584905660369
|
| 535 |
+
],
|
| 536 |
+
"f1_ci": [
|
| 537 |
+
0.8637642103418028,
|
| 538 |
+
0.8637642103418028
|
| 539 |
+
],
|
| 540 |
+
"specificity_ci": [
|
| 541 |
+
0.8458816591311225,
|
| 542 |
+
0.8458816591311225
|
| 543 |
+
],
|
| 544 |
+
"npv_ci": [
|
| 545 |
+
0.8733726632315268,
|
| 546 |
+
0.8733726632315268
|
| 547 |
+
],
|
| 548 |
+
"class_weights": {
|
| 549 |
+
"0.0": 0.5104187143699627,
|
| 550 |
+
"1.0": 24.495283018867923
|
| 551 |
+
}
|
| 552 |
+
},
|
| 553 |
+
"insult": {
|
| 554 |
+
"auc": 0.9116567628368878,
|
| 555 |
+
"threshold": 0.24214455485343933,
|
| 556 |
+
"precision": 0.8063856025869378,
|
| 557 |
+
"recall": 0.8794466403162026,
|
| 558 |
+
"f1": 0.8413329522908936,
|
| 559 |
+
"specificity": 0.7888435374149729,
|
| 560 |
+
"npv": 0.8674359236672227,
|
| 561 |
+
"positive_samples": 1518,
|
| 562 |
+
"true_positives": 2283,
|
| 563 |
+
"false_positives": 548,
|
| 564 |
+
"true_negatives": 2048,
|
| 565 |
+
"false_negatives": 313,
|
| 566 |
+
"auc_ci": [
|
| 567 |
+
0.9116567628368878,
|
| 568 |
+
0.9116567628368878
|
| 569 |
+
],
|
| 570 |
+
"precision_ci": [
|
| 571 |
+
0.8063856025869378,
|
| 572 |
+
0.8063856025869378
|
| 573 |
+
],
|
| 574 |
+
"recall_ci": [
|
| 575 |
+
0.8794466403162026,
|
| 576 |
+
0.8794466403162026
|
| 577 |
+
],
|
| 578 |
+
"f1_ci": [
|
| 579 |
+
0.8413329522908936,
|
| 580 |
+
0.8413329522908936
|
| 581 |
+
],
|
| 582 |
+
"specificity_ci": [
|
| 583 |
+
0.7888435374149729,
|
| 584 |
+
0.7888435374149729
|
| 585 |
+
],
|
| 586 |
+
"npv_ci": [
|
| 587 |
+
0.8674359236672227,
|
| 588 |
+
0.8674359236672227
|
| 589 |
+
],
|
| 590 |
+
"class_weights": {
|
| 591 |
+
"0.0": 0.706530612244898,
|
| 592 |
+
"1.0": 1.7104743083003953
|
| 593 |
+
}
|
| 594 |
+
},
|
| 595 |
+
"identity_hate": {
|
| 596 |
+
"auc": 0.9000925697269513,
|
| 597 |
+
"threshold": 0.03167847916483879,
|
| 598 |
+
"precision": 0.7933569321076599,
|
| 599 |
+
"recall": 0.8865248226950354,
|
| 600 |
+
"f1": 0.8373572860825882,
|
| 601 |
+
"specificity": 0.7690897984117396,
|
| 602 |
+
"npv": 0.8714256962068888,
|
| 603 |
+
"positive_samples": 282,
|
| 604 |
+
"true_positives": 2301,
|
| 605 |
+
"false_positives": 599,
|
| 606 |
+
"true_negatives": 1996,
|
| 607 |
+
"false_negatives": 294,
|
| 608 |
+
"auc_ci": [
|
| 609 |
+
0.9000925697269513,
|
| 610 |
+
0.9000925697269513
|
| 611 |
+
],
|
| 612 |
+
"precision_ci": [
|
| 613 |
+
0.7933569321076599,
|
| 614 |
+
0.7933569321076599
|
| 615 |
+
],
|
| 616 |
+
"recall_ci": [
|
| 617 |
+
0.8865248226950354,
|
| 618 |
+
0.8865248226950354
|
| 619 |
+
],
|
| 620 |
+
"f1_ci": [
|
| 621 |
+
0.8373572860825882,
|
| 622 |
+
0.8373572860825882
|
| 623 |
+
],
|
| 624 |
+
"specificity_ci": [
|
| 625 |
+
0.7690897984117396,
|
| 626 |
+
0.7690897984117396
|
| 627 |
+
],
|
| 628 |
+
"npv_ci": [
|
| 629 |
+
0.8714256962068888,
|
| 630 |
+
0.8714256962068888
|
| 631 |
+
],
|
| 632 |
+
"class_weights": {
|
| 633 |
+
"0.0": 0.5287110568112401,
|
| 634 |
+
"1.0": 9.207446808510639
|
| 635 |
+
}
|
| 636 |
+
}
|
| 637 |
+
},
|
| 638 |
+
"sample_count": 5193
|
| 639 |
+
},
|
| 640 |
+
"2": {
|
| 641 |
+
"auc": 0.9291857688264461,
|
| 642 |
+
"precision": 0.6563281876729908,
|
| 643 |
+
"recall": 0.9071871335232032,
|
| 644 |
+
"f1": 0.7348671832220326,
|
| 645 |
+
"hamming_loss": 0.20595261153076377,
|
| 646 |
+
"exact_match": 0.4263025372845245,
|
| 647 |
+
"specificity": 0.7733622212755961,
|
| 648 |
+
"class_metrics": {
|
| 649 |
+
"toxic": {
|
| 650 |
+
"auc": 0.962186696069825,
|
| 651 |
+
"threshold": 0.3978160321712494,
|
| 652 |
+
"precision": 0.8937958373522624,
|
| 653 |
+
"recall": 0.9136996904024615,
|
| 654 |
+
"f1": 0.9036381748465286,
|
| 655 |
+
"specificity": 0.8914307871267977,
|
| 656 |
+
"npv": 0.9117341057406776,
|
| 657 |
+
"positive_samples": 2584,
|
| 658 |
+
"true_positives": 2358,
|
| 659 |
+
"false_positives": 280,
|
| 660 |
+
"true_negatives": 2301,
|
| 661 |
+
"false_negatives": 222,
|
| 662 |
+
"auc_ci": [
|
| 663 |
+
0.962186696069825,
|
| 664 |
+
0.962186696069825
|
| 665 |
+
],
|
| 666 |
+
"precision_ci": [
|
| 667 |
+
0.8937958373522624,
|
| 668 |
+
0.8937958373522624
|
| 669 |
+
],
|
| 670 |
+
"recall_ci": [
|
| 671 |
+
0.9136996904024615,
|
| 672 |
+
0.9136996904024615
|
| 673 |
+
],
|
| 674 |
+
"f1_ci": [
|
| 675 |
+
0.9036381748465286,
|
| 676 |
+
0.9036381748465286
|
| 677 |
+
],
|
| 678 |
+
"specificity_ci": [
|
| 679 |
+
0.8914307871267977,
|
| 680 |
+
0.8914307871267977
|
| 681 |
+
],
|
| 682 |
+
"npv_ci": [
|
| 683 |
+
0.9117341057406776,
|
| 684 |
+
0.9117341057406776
|
| 685 |
+
],
|
| 686 |
+
"class_weights": {
|
| 687 |
+
"0.0": 1.0009693679720821,
|
| 688 |
+
"1.0": 0.9990325077399381
|
| 689 |
+
}
|
| 690 |
+
},
|
| 691 |
+
"severe_toxic": {
|
| 692 |
+
"auc": 0.890519864426667,
|
| 693 |
+
"threshold": 0.015000982210040092,
|
| 694 |
+
"precision": 0.7460680730510791,
|
| 695 |
+
"recall": 0.918032786885247,
|
| 696 |
+
"f1": 0.8231651924456013,
|
| 697 |
+
"specificity": 0.6875381175035498,
|
| 698 |
+
"npv": 0.8934806428840502,
|
| 699 |
+
"positive_samples": 244,
|
| 700 |
+
"true_positives": 2369,
|
| 701 |
+
"false_positives": 806,
|
| 702 |
+
"true_negatives": 1774,
|
| 703 |
+
"false_negatives": 211,
|
| 704 |
+
"auc_ci": [
|
| 705 |
+
0.890519864426667,
|
| 706 |
+
0.890519864426667
|
| 707 |
+
],
|
| 708 |
+
"precision_ci": [
|
| 709 |
+
0.7460680730510791,
|
| 710 |
+
0.7460680730510791
|
| 711 |
+
],
|
| 712 |
+
"recall_ci": [
|
| 713 |
+
0.918032786885247,
|
| 714 |
+
0.918032786885247
|
| 715 |
+
],
|
| 716 |
+
"f1_ci": [
|
| 717 |
+
0.8231651924456013,
|
| 718 |
+
0.8231651924456013
|
| 719 |
+
],
|
| 720 |
+
"specificity_ci": [
|
| 721 |
+
0.6875381175035498,
|
| 722 |
+
0.6875381175035498
|
| 723 |
+
],
|
| 724 |
+
"npv_ci": [
|
| 725 |
+
0.8934806428840502,
|
| 726 |
+
0.8934806428840502
|
| 727 |
+
],
|
| 728 |
+
"class_weights": {
|
| 729 |
+
"0.0": 0.5248017889815003,
|
| 730 |
+
"1.0": 10.579918032786885
|
| 731 |
+
}
|
| 732 |
+
},
|
| 733 |
+
"obscene": {
|
| 734 |
+
"auc": 0.9233059279915251,
|
| 735 |
+
"threshold": 0.11362762749195099,
|
| 736 |
+
"precision": 0.7873800414823968,
|
| 737 |
+
"recall": 0.9095315024232634,
|
| 738 |
+
"f1": 0.8440592612850891,
|
| 739 |
+
"specificity": 0.7543949044586057,
|
| 740 |
+
"npv": 0.892919379205219,
|
| 741 |
+
"positive_samples": 1238,
|
| 742 |
+
"true_positives": 2347,
|
| 743 |
+
"false_positives": 634,
|
| 744 |
+
"true_negatives": 1947,
|
| 745 |
+
"false_negatives": 233,
|
| 746 |
+
"auc_ci": [
|
| 747 |
+
0.9233059279915251,
|
| 748 |
+
0.9233059279915251
|
| 749 |
+
],
|
| 750 |
+
"precision_ci": [
|
| 751 |
+
0.7873800414823968,
|
| 752 |
+
0.7873800414823968
|
| 753 |
+
],
|
| 754 |
+
"recall_ci": [
|
| 755 |
+
0.9095315024232634,
|
| 756 |
+
0.9095315024232634
|
| 757 |
+
],
|
| 758 |
+
"f1_ci": [
|
| 759 |
+
0.8440592612850891,
|
| 760 |
+
0.8440592612850891
|
| 761 |
+
],
|
| 762 |
+
"specificity_ci": [
|
| 763 |
+
0.7543949044586057,
|
| 764 |
+
0.7543949044586057
|
| 765 |
+
],
|
| 766 |
+
"npv_ci": [
|
| 767 |
+
0.892919379205219,
|
| 768 |
+
0.892919379205219
|
| 769 |
+
],
|
| 770 |
+
"class_weights": {
|
| 771 |
+
"0.0": 0.6577070063694268,
|
| 772 |
+
"1.0": 2.0852180936995155
|
| 773 |
+
}
|
| 774 |
+
},
|
| 775 |
+
"threat": {
|
| 776 |
+
"auc": 0.848578598380765,
|
| 777 |
+
"threshold": 0.008195769973099232,
|
| 778 |
+
"precision": 0.7785886139481758,
|
| 779 |
+
"recall": 0.8055555555555555,
|
| 780 |
+
"f1": 0.791842555156752,
|
| 781 |
+
"specificity": 0.7709198813056214,
|
| 782 |
+
"npv": 0.7985792107105536,
|
| 783 |
+
"positive_samples": 108,
|
| 784 |
+
"true_positives": 2079,
|
| 785 |
+
"false_positives": 591,
|
| 786 |
+
"true_negatives": 1990,
|
| 787 |
+
"false_negatives": 501,
|
| 788 |
+
"auc_ci": [
|
| 789 |
+
0.848578598380765,
|
| 790 |
+
0.848578598380765
|
| 791 |
+
],
|
| 792 |
+
"precision_ci": [
|
| 793 |
+
0.7785886139481758,
|
| 794 |
+
0.7785886139481758
|
| 795 |
+
],
|
| 796 |
+
"recall_ci": [
|
| 797 |
+
0.8055555555555555,
|
| 798 |
+
0.8055555555555555
|
| 799 |
+
],
|
| 800 |
+
"f1_ci": [
|
| 801 |
+
0.791842555156752,
|
| 802 |
+
0.791842555156752
|
| 803 |
+
],
|
| 804 |
+
"specificity_ci": [
|
| 805 |
+
0.7709198813056214,
|
| 806 |
+
0.7709198813056214
|
| 807 |
+
],
|
| 808 |
+
"npv_ci": [
|
| 809 |
+
0.7985792107105536,
|
| 810 |
+
0.7985792107105536
|
| 811 |
+
],
|
| 812 |
+
"class_weights": {
|
| 813 |
+
"0.0": 0.5106824925816024,
|
| 814 |
+
"1.0": 23.90277777777778
|
| 815 |
+
}
|
| 816 |
+
},
|
| 817 |
+
"insult": {
|
| 818 |
+
"auc": 0.8943137096607889,
|
| 819 |
+
"threshold": 0.1587354838848114,
|
| 820 |
+
"precision": 0.7484673378377763,
|
| 821 |
+
"recall": 0.9141347424042362,
|
| 822 |
+
"f1": 0.8230472043830551,
|
| 823 |
+
"specificity": 0.6927925459029957,
|
| 824 |
+
"npv": 0.889726581805318,
|
| 825 |
+
"positive_samples": 1514,
|
| 826 |
+
"true_positives": 2359,
|
| 827 |
+
"false_positives": 793,
|
| 828 |
+
"true_negatives": 1788,
|
| 829 |
+
"false_negatives": 221,
|
| 830 |
+
"auc_ci": [
|
| 831 |
+
0.8943137096607889,
|
| 832 |
+
0.8943137096607889
|
| 833 |
+
],
|
| 834 |
+
"precision_ci": [
|
| 835 |
+
0.7484673378377763,
|
| 836 |
+
0.7484673378377763
|
| 837 |
+
],
|
| 838 |
+
"recall_ci": [
|
| 839 |
+
0.9141347424042362,
|
| 840 |
+
0.9141347424042362
|
| 841 |
+
],
|
| 842 |
+
"f1_ci": [
|
| 843 |
+
0.8230472043830551,
|
| 844 |
+
0.8230472043830551
|
| 845 |
+
],
|
| 846 |
+
"specificity_ci": [
|
| 847 |
+
0.6927925459029957,
|
| 848 |
+
0.6927925459029957
|
| 849 |
+
],
|
| 850 |
+
"npv_ci": [
|
| 851 |
+
0.889726581805318,
|
| 852 |
+
0.889726581805318
|
| 853 |
+
],
|
| 854 |
+
"class_weights": {
|
| 855 |
+
"0.0": 0.7074540970128802,
|
| 856 |
+
"1.0": 1.7050858652575958
|
| 857 |
+
}
|
| 858 |
+
},
|
| 859 |
+
"identity_hate": {
|
| 860 |
+
"auc": 0.9040654827596841,
|
| 861 |
+
"threshold": 0.0467526838183403,
|
| 862 |
+
"precision": 0.8408828817107497,
|
| 863 |
+
"recall": 0.8291814946619218,
|
| 864 |
+
"f1": 0.8349911950184066,
|
| 865 |
+
"specificity": 0.8430970913560043,
|
| 866 |
+
"npv": 0.8315259121222329,
|
| 867 |
+
"positive_samples": 281,
|
| 868 |
+
"true_positives": 2140,
|
| 869 |
+
"false_positives": 405,
|
| 870 |
+
"true_negatives": 2176,
|
| 871 |
+
"false_negatives": 440,
|
| 872 |
+
"auc_ci": [
|
| 873 |
+
0.9040654827596841,
|
| 874 |
+
0.9040654827596841
|
| 875 |
+
],
|
| 876 |
+
"precision_ci": [
|
| 877 |
+
0.8408828817107497,
|
| 878 |
+
0.8408828817107497
|
| 879 |
+
],
|
| 880 |
+
"recall_ci": [
|
| 881 |
+
0.8291814946619218,
|
| 882 |
+
0.8291814946619218
|
| 883 |
+
],
|
| 884 |
+
"f1_ci": [
|
| 885 |
+
0.8349911950184066,
|
| 886 |
+
0.8349911950184066
|
| 887 |
+
],
|
| 888 |
+
"specificity_ci": [
|
| 889 |
+
0.8430970913560043,
|
| 890 |
+
0.8430970913560043
|
| 891 |
+
],
|
| 892 |
+
"npv_ci": [
|
| 893 |
+
0.8315259121222329,
|
| 894 |
+
0.8315259121222329
|
| 895 |
+
],
|
| 896 |
+
"class_weights": {
|
| 897 |
+
"0.0": 0.5287791888570258,
|
| 898 |
+
"1.0": 9.186832740213523
|
| 899 |
+
}
|
| 900 |
+
}
|
| 901 |
+
},
|
| 902 |
+
"sample_count": 5163
|
| 903 |
+
},
|
| 904 |
+
"3": {
|
| 905 |
+
"auc": 0.9472472410532857,
|
| 906 |
+
"precision": 0.6982701786686969,
|
| 907 |
+
"recall": 0.9152656355077337,
|
| 908 |
+
"f1": 0.7674148586410611,
|
| 909 |
+
"hamming_loss": 0.1731811145510836,
|
| 910 |
+
"exact_match": 0.48471362229102166,
|
| 911 |
+
"specificity": 0.8133241121366614,
|
| 912 |
+
"class_metrics": {
|
| 913 |
+
"toxic": {
|
| 914 |
+
"auc": 0.9747483574660619,
|
| 915 |
+
"threshold": 0.5033379793167114,
|
| 916 |
+
"precision": 0.9204374197691823,
|
| 917 |
+
"recall": 0.9294300116324036,
|
| 918 |
+
"f1": 0.9249118582673775,
|
| 919 |
+
"specificity": 0.9196601004248757,
|
| 920 |
+
"npv": 0.9287337466652424,
|
| 921 |
+
"positive_samples": 2579,
|
| 922 |
+
"true_positives": 2401,
|
| 923 |
+
"false_positives": 207,
|
| 924 |
+
"true_negatives": 2376,
|
| 925 |
+
"false_negatives": 182,
|
| 926 |
+
"auc_ci": [
|
| 927 |
+
0.9747483574660619,
|
| 928 |
+
0.9747483574660619
|
| 929 |
+
],
|
| 930 |
+
"precision_ci": [
|
| 931 |
+
0.9204374197691823,
|
| 932 |
+
0.9204374197691823
|
| 933 |
+
],
|
| 934 |
+
"recall_ci": [
|
| 935 |
+
0.9294300116324036,
|
| 936 |
+
0.9294300116324036
|
| 937 |
+
],
|
| 938 |
+
"f1_ci": [
|
| 939 |
+
0.9249118582673775,
|
| 940 |
+
0.9249118582673775
|
| 941 |
+
],
|
| 942 |
+
"specificity_ci": [
|
| 943 |
+
0.9196601004248757,
|
| 944 |
+
0.9196601004248757
|
| 945 |
+
],
|
| 946 |
+
"npv_ci": [
|
| 947 |
+
0.9287337466652424,
|
| 948 |
+
0.9287337466652424
|
| 949 |
+
],
|
| 950 |
+
"class_weights": {
|
| 951 |
+
"0.0": 0.9980687524140595,
|
| 952 |
+
"1.0": 1.0019387359441645
|
| 953 |
+
}
|
| 954 |
+
},
|
| 955 |
+
"severe_toxic": {
|
| 956 |
+
"auc": 0.9073687265747961,
|
| 957 |
+
"threshold": 0.021415209397673607,
|
| 958 |
+
"precision": 0.7618540559183846,
|
| 959 |
+
"recall": 0.93388429752066,
|
| 960 |
+
"f1": 0.8391430651806406,
|
| 961 |
+
"specificity": 0.7080795777506993,
|
| 962 |
+
"npv": 0.9146007419992344,
|
| 963 |
+
"positive_samples": 242,
|
| 964 |
+
"true_positives": 2413,
|
| 965 |
+
"false_positives": 754,
|
| 966 |
+
"true_negatives": 1829,
|
| 967 |
+
"false_negatives": 170,
|
| 968 |
+
"auc_ci": [
|
| 969 |
+
0.9073687265747961,
|
| 970 |
+
0.9073687265747961
|
| 971 |
+
],
|
| 972 |
+
"precision_ci": [
|
| 973 |
+
0.7618540559183846,
|
| 974 |
+
0.7618540559183846
|
| 975 |
+
],
|
| 976 |
+
"recall_ci": [
|
| 977 |
+
0.93388429752066,
|
| 978 |
+
0.93388429752066
|
| 979 |
+
],
|
| 980 |
+
"f1_ci": [
|
| 981 |
+
0.8391430651806406,
|
| 982 |
+
0.8391430651806406
|
| 983 |
+
],
|
| 984 |
+
"specificity_ci": [
|
| 985 |
+
0.7080795777506993,
|
| 986 |
+
0.7080795777506993
|
| 987 |
+
],
|
| 988 |
+
"npv_ci": [
|
| 989 |
+
0.9146007419992344,
|
| 990 |
+
0.9146007419992344
|
| 991 |
+
],
|
| 992 |
+
"class_weights": {
|
| 993 |
+
"0.0": 0.5245635403978888,
|
| 994 |
+
"1.0": 10.677685950413224
|
| 995 |
+
}
|
| 996 |
+
},
|
| 997 |
+
"obscene": {
|
| 998 |
+
"auc": 0.9429228614622618,
|
| 999 |
+
"threshold": 0.14896434545516968,
|
| 1000 |
+
"precision": 0.822101549733319,
|
| 1001 |
+
"recall": 0.9148418491484125,
|
| 1002 |
+
"f1": 0.8659958665665364,
|
| 1003 |
+
"specificity": 0.8020330368488026,
|
| 1004 |
+
"npv": 0.9040137548341648,
|
| 1005 |
+
"positive_samples": 1233,
|
| 1006 |
+
"true_positives": 2363,
|
| 1007 |
+
"false_positives": 511,
|
| 1008 |
+
"true_negatives": 2072,
|
| 1009 |
+
"false_negatives": 220,
|
| 1010 |
+
"auc_ci": [
|
| 1011 |
+
0.9429228614622618,
|
| 1012 |
+
0.9429228614622618
|
| 1013 |
+
],
|
| 1014 |
+
"precision_ci": [
|
| 1015 |
+
0.822101549733319,
|
| 1016 |
+
0.822101549733319
|
| 1017 |
+
],
|
| 1018 |
+
"recall_ci": [
|
| 1019 |
+
0.9148418491484125,
|
| 1020 |
+
0.9148418491484125
|
| 1021 |
+
],
|
| 1022 |
+
"f1_ci": [
|
| 1023 |
+
0.8659958665665364,
|
| 1024 |
+
0.8659958665665364
|
| 1025 |
+
],
|
| 1026 |
+
"specificity_ci": [
|
| 1027 |
+
0.8020330368488026,
|
| 1028 |
+
0.8020330368488026
|
| 1029 |
+
],
|
| 1030 |
+
"npv_ci": [
|
| 1031 |
+
0.9040137548341648,
|
| 1032 |
+
0.9040137548341648
|
| 1033 |
+
],
|
| 1034 |
+
"class_weights": {
|
| 1035 |
+
"0.0": 0.6566709021601016,
|
| 1036 |
+
"1.0": 2.095701540957015
|
| 1037 |
+
}
|
| 1038 |
+
},
|
| 1039 |
+
"threat": {
|
| 1040 |
+
"auc": 0.8985232762406729,
|
| 1041 |
+
"threshold": 0.013273251242935658,
|
| 1042 |
+
"precision": 0.8299773755655987,
|
| 1043 |
+
"recall": 0.8055555555555544,
|
| 1044 |
+
"f1": 0.8175841319366995,
|
| 1045 |
+
"specificity": 0.8349802371541444,
|
| 1046 |
+
"npv": 0.8111134812286639,
|
| 1047 |
+
"positive_samples": 108,
|
| 1048 |
+
"true_positives": 2081,
|
| 1049 |
+
"false_positives": 426,
|
| 1050 |
+
"true_negatives": 2157,
|
| 1051 |
+
"false_negatives": 502,
|
| 1052 |
+
"auc_ci": [
|
| 1053 |
+
0.8985232762406729,
|
| 1054 |
+
0.8985232762406729
|
| 1055 |
+
],
|
| 1056 |
+
"precision_ci": [
|
| 1057 |
+
0.8299773755655987,
|
| 1058 |
+
0.8299773755655987
|
| 1059 |
+
],
|
| 1060 |
+
"recall_ci": [
|
| 1061 |
+
0.8055555555555544,
|
| 1062 |
+
0.8055555555555544
|
| 1063 |
+
],
|
| 1064 |
+
"f1_ci": [
|
| 1065 |
+
0.8175841319366995,
|
| 1066 |
+
0.8175841319366995
|
| 1067 |
+
],
|
| 1068 |
+
"specificity_ci": [
|
| 1069 |
+
0.8349802371541444,
|
| 1070 |
+
0.8349802371541444
|
| 1071 |
+
],
|
| 1072 |
+
"npv_ci": [
|
| 1073 |
+
0.8111134812286639,
|
| 1074 |
+
0.8111134812286639
|
| 1075 |
+
],
|
| 1076 |
+
"class_weights": {
|
| 1077 |
+
"0.0": 0.5106719367588933,
|
| 1078 |
+
"1.0": 23.925925925925927
|
| 1079 |
+
}
|
| 1080 |
+
},
|
| 1081 |
+
"insult": {
|
| 1082 |
+
"auc": 0.9178884966596437,
|
| 1083 |
+
"threshold": 0.22368550300598145,
|
| 1084 |
+
"precision": 0.8017937840347082,
|
| 1085 |
+
"recall": 0.9065606361828928,
|
| 1086 |
+
"f1": 0.8509647346472855,
|
| 1087 |
+
"specificity": 0.7758950532932412,
|
| 1088 |
+
"npv": 0.8925162032262658,
|
| 1089 |
+
"positive_samples": 1509,
|
| 1090 |
+
"true_positives": 2342,
|
| 1091 |
+
"false_positives": 579,
|
| 1092 |
+
"true_negatives": 2004,
|
| 1093 |
+
"false_negatives": 241,
|
| 1094 |
+
"auc_ci": [
|
| 1095 |
+
0.9178884966596437,
|
| 1096 |
+
0.9178884966596437
|
| 1097 |
+
],
|
| 1098 |
+
"precision_ci": [
|
| 1099 |
+
0.8017937840347082,
|
| 1100 |
+
0.8017937840347082
|
| 1101 |
+
],
|
| 1102 |
+
"recall_ci": [
|
| 1103 |
+
0.9065606361828928,
|
| 1104 |
+
0.9065606361828928
|
| 1105 |
+
],
|
| 1106 |
+
"f1_ci": [
|
| 1107 |
+
0.8509647346472855,
|
| 1108 |
+
0.8509647346472855
|
| 1109 |
+
],
|
| 1110 |
+
"specificity_ci": [
|
| 1111 |
+
0.7758950532932412,
|
| 1112 |
+
0.7758950532932412
|
| 1113 |
+
],
|
| 1114 |
+
"npv_ci": [
|
| 1115 |
+
0.8925162032262658,
|
| 1116 |
+
0.8925162032262658
|
| 1117 |
+
],
|
| 1118 |
+
"class_weights": {
|
| 1119 |
+
"0.0": 0.70620388084176,
|
| 1120 |
+
"1.0": 1.7123923127899272
|
| 1121 |
+
}
|
| 1122 |
+
},
|
| 1123 |
+
"identity_hate": {
|
| 1124 |
+
"auc": 0.9242209406948756,
|
| 1125 |
+
"threshold": 0.042373284697532654,
|
| 1126 |
+
"precision": 0.8424336725093711,
|
| 1127 |
+
"recall": 0.8592057761732879,
|
| 1128 |
+
"f1": 0.8507370677416805,
|
| 1129 |
+
"specificity": 0.839296667348186,
|
| 1130 |
+
"npv": 0.8563457480377756,
|
| 1131 |
+
"positive_samples": 277,
|
| 1132 |
+
"true_positives": 2220,
|
| 1133 |
+
"false_positives": 415,
|
| 1134 |
+
"true_negatives": 2168,
|
| 1135 |
+
"false_negatives": 363,
|
| 1136 |
+
"auc_ci": [
|
| 1137 |
+
0.9242209406948756,
|
| 1138 |
+
0.9242209406948756
|
| 1139 |
+
],
|
| 1140 |
+
"precision_ci": [
|
| 1141 |
+
0.8424336725093711,
|
| 1142 |
+
0.8424336725093711
|
| 1143 |
+
],
|
| 1144 |
+
"recall_ci": [
|
| 1145 |
+
0.8592057761732879,
|
| 1146 |
+
0.8592057761732879
|
| 1147 |
+
],
|
| 1148 |
+
"f1_ci": [
|
| 1149 |
+
0.8507370677416805,
|
| 1150 |
+
0.8507370677416805
|
| 1151 |
+
],
|
| 1152 |
+
"specificity_ci": [
|
| 1153 |
+
0.839296667348186,
|
| 1154 |
+
0.839296667348186
|
| 1155 |
+
],
|
| 1156 |
+
"npv_ci": [
|
| 1157 |
+
0.8563457480377756,
|
| 1158 |
+
0.8563457480377756
|
| 1159 |
+
],
|
| 1160 |
+
"class_weights": {
|
| 1161 |
+
"0.0": 0.5283173175219792,
|
| 1162 |
+
"1.0": 9.328519855595667
|
| 1163 |
+
}
|
| 1164 |
+
}
|
| 1165 |
+
},
|
| 1166 |
+
"sample_count": 5168
|
| 1167 |
+
},
|
| 1168 |
+
"4": {
|
| 1169 |
+
"auc": 0.9418392933687934,
|
| 1170 |
+
"precision": 0.7019672150256779,
|
| 1171 |
+
"recall": 0.9036673990197736,
|
| 1172 |
+
"f1": 0.766375554274002,
|
| 1173 |
+
"hamming_loss": 0.1651803024428073,
|
| 1174 |
+
"exact_match": 0.4955409073284219,
|
| 1175 |
+
"specificity": 0.8245338509682739,
|
| 1176 |
+
"class_metrics": {
|
| 1177 |
+
"toxic": {
|
| 1178 |
+
"auc": 0.9718317503718501,
|
| 1179 |
+
"threshold": 0.4544762372970581,
|
| 1180 |
+
"precision": 0.9205380327767301,
|
| 1181 |
+
"recall": 0.9217594394705978,
|
| 1182 |
+
"f1": 0.9211483312394544,
|
| 1183 |
+
"specificity": 0.9204325994592514,
|
| 1184 |
+
"npv": 0.9216554888385321,
|
| 1185 |
+
"positive_samples": 2569,
|
| 1186 |
+
"true_positives": 2377,
|
| 1187 |
+
"false_positives": 205,
|
| 1188 |
+
"true_negatives": 2373,
|
| 1189 |
+
"false_negatives": 201,
|
| 1190 |
+
"auc_ci": [
|
| 1191 |
+
0.9718317503718501,
|
| 1192 |
+
0.9718317503718501
|
| 1193 |
+
],
|
| 1194 |
+
"precision_ci": [
|
| 1195 |
+
0.9205380327767301,
|
| 1196 |
+
0.9205380327767301
|
| 1197 |
+
],
|
| 1198 |
+
"recall_ci": [
|
| 1199 |
+
0.9217594394705978,
|
| 1200 |
+
0.9217594394705978
|
| 1201 |
+
],
|
| 1202 |
+
"f1_ci": [
|
| 1203 |
+
0.9211483312394544,
|
| 1204 |
+
0.9211483312394544
|
| 1205 |
+
],
|
| 1206 |
+
"specificity_ci": [
|
| 1207 |
+
0.9204325994592514,
|
| 1208 |
+
0.9204325994592514
|
| 1209 |
+
],
|
| 1210 |
+
"npv_ci": [
|
| 1211 |
+
0.9216554888385321,
|
| 1212 |
+
0.9216554888385321
|
| 1213 |
+
],
|
| 1214 |
+
"class_weights": {
|
| 1215 |
+
"0.0": 0.9961375048281189,
|
| 1216 |
+
"1.0": 1.003892565200467
|
| 1217 |
+
}
|
| 1218 |
+
},
|
| 1219 |
+
"severe_toxic": {
|
| 1220 |
+
"auc": 0.8962662667751142,
|
| 1221 |
+
"threshold": 0.0307308342307806,
|
| 1222 |
+
"precision": 0.7913182428501319,
|
| 1223 |
+
"recall": 0.8458333333333329,
|
| 1224 |
+
"f1": 0.8176681460830066,
|
| 1225 |
+
"specificity": 0.7769418462789687,
|
| 1226 |
+
"npv": 0.834426745622858,
|
| 1227 |
+
"positive_samples": 240,
|
| 1228 |
+
"true_positives": 2181,
|
| 1229 |
+
"false_positives": 575,
|
| 1230 |
+
"true_negatives": 2003,
|
| 1231 |
+
"false_negatives": 397,
|
| 1232 |
+
"auc_ci": [
|
| 1233 |
+
0.8962662667751142,
|
| 1234 |
+
0.8962662667751142
|
| 1235 |
+
],
|
| 1236 |
+
"precision_ci": [
|
| 1237 |
+
0.7913182428501319,
|
| 1238 |
+
0.7913182428501319
|
| 1239 |
+
],
|
| 1240 |
+
"recall_ci": [
|
| 1241 |
+
0.8458333333333329,
|
| 1242 |
+
0.8458333333333329
|
| 1243 |
+
],
|
| 1244 |
+
"f1_ci": [
|
| 1245 |
+
0.8176681460830066,
|
| 1246 |
+
0.8176681460830066
|
| 1247 |
+
],
|
| 1248 |
+
"specificity_ci": [
|
| 1249 |
+
0.7769418462789687,
|
| 1250 |
+
0.7769418462789687
|
| 1251 |
+
],
|
| 1252 |
+
"npv_ci": [
|
| 1253 |
+
0.834426745622858,
|
| 1254 |
+
0.834426745622858
|
| 1255 |
+
],
|
| 1256 |
+
"class_weights": {
|
| 1257 |
+
"0.0": 0.5244001626677511,
|
| 1258 |
+
"1.0": 10.745833333333334
|
| 1259 |
+
}
|
| 1260 |
+
},
|
| 1261 |
+
"obscene": {
|
| 1262 |
+
"auc": 0.9401245966951454,
|
| 1263 |
+
"threshold": 0.1775909662246704,
|
| 1264 |
+
"precision": 0.8495468615216861,
|
| 1265 |
+
"recall": 0.8913398692810475,
|
| 1266 |
+
"f1": 0.8699417085541208,
|
| 1267 |
+
"specificity": 0.8421453990848948,
|
| 1268 |
+
"npv": 0.8857178178787266,
|
| 1269 |
+
"positive_samples": 1224,
|
| 1270 |
+
"true_positives": 2298,
|
| 1271 |
+
"false_positives": 407,
|
| 1272 |
+
"true_negatives": 2171,
|
| 1273 |
+
"false_negatives": 280,
|
| 1274 |
+
"auc_ci": [
|
| 1275 |
+
0.9401245966951454,
|
| 1276 |
+
0.9401245966951454
|
| 1277 |
+
],
|
| 1278 |
+
"precision_ci": [
|
| 1279 |
+
0.8495468615216861,
|
| 1280 |
+
0.8495468615216861
|
| 1281 |
+
],
|
| 1282 |
+
"recall_ci": [
|
| 1283 |
+
0.8913398692810475,
|
| 1284 |
+
0.8913398692810475
|
| 1285 |
+
],
|
| 1286 |
+
"f1_ci": [
|
| 1287 |
+
0.8699417085541208,
|
| 1288 |
+
0.8699417085541208
|
| 1289 |
+
],
|
| 1290 |
+
"specificity_ci": [
|
| 1291 |
+
0.8421453990848948,
|
| 1292 |
+
0.8421453990848948
|
| 1293 |
+
],
|
| 1294 |
+
"npv_ci": [
|
| 1295 |
+
0.8857178178787266,
|
| 1296 |
+
0.8857178178787266
|
| 1297 |
+
],
|
| 1298 |
+
"class_weights": {
|
| 1299 |
+
"0.0": 0.6555668530757499,
|
| 1300 |
+
"1.0": 2.1070261437908497
|
| 1301 |
+
}
|
| 1302 |
+
},
|
| 1303 |
+
"threat": {
|
| 1304 |
+
"auc": 0.8861722579224652,
|
| 1305 |
+
"threshold": 0.014509523287415504,
|
| 1306 |
+
"precision": 0.841106024006686,
|
| 1307 |
+
"recall": 0.7943925233644874,
|
| 1308 |
+
"f1": 0.81708215259711,
|
| 1309 |
+
"specificity": 0.8499307067907416,
|
| 1310 |
+
"npv": 0.8052107636996033,
|
| 1311 |
+
"positive_samples": 107,
|
| 1312 |
+
"true_positives": 2048,
|
| 1313 |
+
"false_positives": 387,
|
| 1314 |
+
"true_negatives": 2191,
|
| 1315 |
+
"false_negatives": 530,
|
| 1316 |
+
"auc_ci": [
|
| 1317 |
+
0.8861722579224652,
|
| 1318 |
+
0.8861722579224652
|
| 1319 |
+
],
|
| 1320 |
+
"precision_ci": [
|
| 1321 |
+
0.841106024006686,
|
| 1322 |
+
0.841106024006686
|
| 1323 |
+
],
|
| 1324 |
+
"recall_ci": [
|
| 1325 |
+
0.7943925233644874,
|
| 1326 |
+
0.7943925233644874
|
| 1327 |
+
],
|
| 1328 |
+
"f1_ci": [
|
| 1329 |
+
0.81708215259711,
|
| 1330 |
+
0.81708215259711
|
| 1331 |
+
],
|
| 1332 |
+
"specificity_ci": [
|
| 1333 |
+
0.8499307067907416,
|
| 1334 |
+
0.8499307067907416
|
| 1335 |
+
],
|
| 1336 |
+
"npv_ci": [
|
| 1337 |
+
0.8052107636996033,
|
| 1338 |
+
0.8052107636996033
|
| 1339 |
+
],
|
| 1340 |
+
"class_weights": {
|
| 1341 |
+
"0.0": 0.5105919619877252,
|
| 1342 |
+
"1.0": 24.102803738317757
|
| 1343 |
+
}
|
| 1344 |
+
},
|
| 1345 |
+
"insult": {
|
| 1346 |
+
"auc": 0.908347099690273,
|
| 1347 |
+
"threshold": 0.19917058944702148,
|
| 1348 |
+
"precision": 0.787211545222267,
|
| 1349 |
+
"recall": 0.9028609447771131,
|
| 1350 |
+
"f1": 0.8410793781503274,
|
| 1351 |
+
"specificity": 0.755950752393989,
|
| 1352 |
+
"npv": 0.8861326740097348,
|
| 1353 |
+
"positive_samples": 1503,
|
| 1354 |
+
"true_positives": 2328,
|
| 1355 |
+
"false_positives": 629,
|
| 1356 |
+
"true_negatives": 1949,
|
| 1357 |
+
"false_negatives": 250,
|
| 1358 |
+
"auc_ci": [
|
| 1359 |
+
0.908347099690273,
|
| 1360 |
+
0.908347099690273
|
| 1361 |
+
],
|
| 1362 |
+
"precision_ci": [
|
| 1363 |
+
0.787211545222267,
|
| 1364 |
+
0.787211545222267
|
| 1365 |
+
],
|
| 1366 |
+
"recall_ci": [
|
| 1367 |
+
0.9028609447771131,
|
| 1368 |
+
0.9028609447771131
|
| 1369 |
+
],
|
| 1370 |
+
"f1_ci": [
|
| 1371 |
+
0.8410793781503274,
|
| 1372 |
+
0.8410793781503274
|
| 1373 |
+
],
|
| 1374 |
+
"specificity_ci": [
|
| 1375 |
+
0.755950752393989,
|
| 1376 |
+
0.755950752393989
|
| 1377 |
+
],
|
| 1378 |
+
"npv_ci": [
|
| 1379 |
+
0.8861326740097348,
|
| 1380 |
+
0.8861326740097348
|
| 1381 |
+
],
|
| 1382 |
+
"class_weights": {
|
| 1383 |
+
"0.0": 0.7056087551299589,
|
| 1384 |
+
"1.0": 1.7159015302727878
|
| 1385 |
+
}
|
| 1386 |
+
},
|
| 1387 |
+
"identity_hate": {
|
| 1388 |
+
"auc": 0.9136671508934288,
|
| 1389 |
+
"threshold": 0.031982019543647766,
|
| 1390 |
+
"precision": 0.8173388685191341,
|
| 1391 |
+
"recall": 0.8868613138686137,
|
| 1392 |
+
"f1": 0.8506820152960648,
|
| 1393 |
+
"specificity": 0.801801801801802,
|
| 1394 |
+
"npv": 0.8763431199913764,
|
| 1395 |
+
"positive_samples": 274,
|
| 1396 |
+
"true_positives": 2287,
|
| 1397 |
+
"false_positives": 511,
|
| 1398 |
+
"true_negatives": 2067,
|
| 1399 |
+
"false_negatives": 291,
|
| 1400 |
+
"auc_ci": [
|
| 1401 |
+
0.9136671508934288,
|
| 1402 |
+
0.9136671508934288
|
| 1403 |
+
],
|
| 1404 |
+
"precision_ci": [
|
| 1405 |
+
0.8173388685191341,
|
| 1406 |
+
0.8173388685191341
|
| 1407 |
+
],
|
| 1408 |
+
"recall_ci": [
|
| 1409 |
+
0.8868613138686137,
|
| 1410 |
+
0.8868613138686137
|
| 1411 |
+
],
|
| 1412 |
+
"f1_ci": [
|
| 1413 |
+
0.8506820152960648,
|
| 1414 |
+
0.8506820152960648
|
| 1415 |
+
],
|
| 1416 |
+
"specificity_ci": [
|
| 1417 |
+
0.801801801801802,
|
| 1418 |
+
0.801801801801802
|
| 1419 |
+
],
|
| 1420 |
+
"npv_ci": [
|
| 1421 |
+
0.8763431199913764,
|
| 1422 |
+
0.8763431199913764
|
| 1423 |
+
],
|
| 1424 |
+
"class_weights": {
|
| 1425 |
+
"0.0": 0.528050778050778,
|
| 1426 |
+
"1.0": 9.412408759124087
|
| 1427 |
+
}
|
| 1428 |
+
}
|
| 1429 |
+
},
|
| 1430 |
+
"sample_count": 5158
|
| 1431 |
+
},
|
| 1432 |
+
"5": {
|
| 1433 |
+
"auc": 0.9460152147041221,
|
| 1434 |
+
"precision": 0.7347347983801011,
|
| 1435 |
+
"recall": 0.8867510548523206,
|
| 1436 |
+
"f1": 0.7840490209789418,
|
| 1437 |
+
"hamming_loss": 0.13677289804378806,
|
| 1438 |
+
"exact_match": 0.5347842984842596,
|
| 1439 |
+
"specificity": 0.8623489178772902,
|
| 1440 |
+
"class_metrics": {
|
| 1441 |
+
"toxic": {
|
| 1442 |
+
"auc": 0.9757415342563065,
|
| 1443 |
+
"threshold": 0.5313886404037476,
|
| 1444 |
+
"precision": 0.9310023292772915,
|
| 1445 |
+
"recall": 0.9121306376360682,
|
| 1446 |
+
"f1": 0.9214698705828952,
|
| 1447 |
+
"specificity": 0.9324009324009348,
|
| 1448 |
+
"npv": 0.9138763886248709,
|
| 1449 |
+
"positive_samples": 2572,
|
| 1450 |
+
"true_positives": 2346,
|
| 1451 |
+
"false_positives": 173,
|
| 1452 |
+
"true_negatives": 2399,
|
| 1453 |
+
"false_negatives": 226,
|
| 1454 |
+
"auc_ci": [
|
| 1455 |
+
0.9757415342563065,
|
| 1456 |
+
0.9757415342563065
|
| 1457 |
+
],
|
| 1458 |
+
"precision_ci": [
|
| 1459 |
+
0.9310023292772915,
|
| 1460 |
+
0.9310023292772915
|
| 1461 |
+
],
|
| 1462 |
+
"recall_ci": [
|
| 1463 |
+
0.9121306376360682,
|
| 1464 |
+
0.9121306376360682
|
| 1465 |
+
],
|
| 1466 |
+
"f1_ci": [
|
| 1467 |
+
0.9214698705828952,
|
| 1468 |
+
0.9214698705828952
|
| 1469 |
+
],
|
| 1470 |
+
"specificity_ci": [
|
| 1471 |
+
0.9324009324009348,
|
| 1472 |
+
0.9324009324009348
|
| 1473 |
+
],
|
| 1474 |
+
"npv_ci": [
|
| 1475 |
+
0.9138763886248709,
|
| 1476 |
+
0.9138763886248709
|
| 1477 |
+
],
|
| 1478 |
+
"class_weights": {
|
| 1479 |
+
"0.0": 0.9996114996114996,
|
| 1480 |
+
"1.0": 1.0003888024883358
|
| 1481 |
+
}
|
| 1482 |
+
},
|
| 1483 |
+
"severe_toxic": {
|
| 1484 |
+
"auc": 0.9032281899714669,
|
| 1485 |
+
"threshold": 0.05001964047551155,
|
| 1486 |
+
"precision": 0.8240547826417868,
|
| 1487 |
+
"recall": 0.8458333333333334,
|
| 1488 |
+
"f1": 0.8348020409069885,
|
| 1489 |
+
"specificity": 0.8194048104362093,
|
| 1490 |
+
"npv": 0.8416483326674401,
|
| 1491 |
+
"positive_samples": 240,
|
| 1492 |
+
"true_positives": 2176,
|
| 1493 |
+
"false_positives": 464,
|
| 1494 |
+
"true_negatives": 2108,
|
| 1495 |
+
"false_negatives": 396,
|
| 1496 |
+
"auc_ci": [
|
| 1497 |
+
0.9032281899714669,
|
| 1498 |
+
0.9032281899714669
|
| 1499 |
+
],
|
| 1500 |
+
"precision_ci": [
|
| 1501 |
+
0.8240547826417868,
|
| 1502 |
+
0.8240547826417868
|
| 1503 |
+
],
|
| 1504 |
+
"recall_ci": [
|
| 1505 |
+
0.8458333333333334,
|
| 1506 |
+
0.8458333333333334
|
| 1507 |
+
],
|
| 1508 |
+
"f1_ci": [
|
| 1509 |
+
0.8348020409069885,
|
| 1510 |
+
0.8348020409069885
|
| 1511 |
+
],
|
| 1512 |
+
"specificity_ci": [
|
| 1513 |
+
0.8194048104362093,
|
| 1514 |
+
0.8194048104362093
|
| 1515 |
+
],
|
| 1516 |
+
"npv_ci": [
|
| 1517 |
+
0.8416483326674401,
|
| 1518 |
+
0.8416483326674401
|
| 1519 |
+
],
|
| 1520 |
+
"class_weights": {
|
| 1521 |
+
"0.0": 0.5244598450876478,
|
| 1522 |
+
"1.0": 10.720833333333333
|
| 1523 |
+
}
|
| 1524 |
+
},
|
| 1525 |
+
"obscene": {
|
| 1526 |
+
"auc": 0.9399297347094935,
|
| 1527 |
+
"threshold": 0.20134443044662476,
|
| 1528 |
+
"precision": 0.8638120606436712,
|
| 1529 |
+
"recall": 0.8799999999999917,
|
| 1530 |
+
"f1": 0.8718308933886383,
|
| 1531 |
+
"specificity": 0.8612598826829971,
|
| 1532 |
+
"npv": 0.8777082380338568,
|
| 1533 |
+
"positive_samples": 1225,
|
| 1534 |
+
"true_positives": 2264,
|
| 1535 |
+
"false_positives": 356,
|
| 1536 |
+
"true_negatives": 2216,
|
| 1537 |
+
"false_negatives": 308,
|
| 1538 |
+
"auc_ci": [
|
| 1539 |
+
0.9399297347094935,
|
| 1540 |
+
0.9399297347094935
|
| 1541 |
+
],
|
| 1542 |
+
"precision_ci": [
|
| 1543 |
+
0.8638120606436712,
|
| 1544 |
+
0.8638120606436712
|
| 1545 |
+
],
|
| 1546 |
+
"recall_ci": [
|
| 1547 |
+
0.8799999999999917,
|
| 1548 |
+
0.8799999999999917
|
| 1549 |
+
],
|
| 1550 |
+
"f1_ci": [
|
| 1551 |
+
0.8718308933886383,
|
| 1552 |
+
0.8718308933886383
|
| 1553 |
+
],
|
| 1554 |
+
"specificity_ci": [
|
| 1555 |
+
0.8612598826829971,
|
| 1556 |
+
0.8612598826829971
|
| 1557 |
+
],
|
| 1558 |
+
"npv_ci": [
|
| 1559 |
+
0.8777082380338568,
|
| 1560 |
+
0.8777082380338568
|
| 1561 |
+
],
|
| 1562 |
+
"class_weights": {
|
| 1563 |
+
"0.0": 0.6562101504718184,
|
| 1564 |
+
"1.0": 2.100408163265306
|
| 1565 |
+
}
|
| 1566 |
+
},
|
| 1567 |
+
"threat": {
|
| 1568 |
+
"auc": 0.8786647405643102,
|
| 1569 |
+
"threshold": 0.018557138741016388,
|
| 1570 |
+
"precision": 0.8659949024954022,
|
| 1571 |
+
"recall": 0.8055555555555568,
|
| 1572 |
+
"f1": 0.834682556458845,
|
| 1573 |
+
"specificity": 0.8753473600635171,
|
| 1574 |
+
"npv": 0.8182408543184921,
|
| 1575 |
+
"positive_samples": 108,
|
| 1576 |
+
"true_positives": 2072,
|
| 1577 |
+
"false_positives": 320,
|
| 1578 |
+
"true_negatives": 2252,
|
| 1579 |
+
"false_negatives": 500,
|
| 1580 |
+
"auc_ci": [
|
| 1581 |
+
0.8786647405643102,
|
| 1582 |
+
0.8786647405643102
|
| 1583 |
+
],
|
| 1584 |
+
"precision_ci": [
|
| 1585 |
+
0.8659949024954022,
|
| 1586 |
+
0.8659949024954022
|
| 1587 |
+
],
|
| 1588 |
+
"recall_ci": [
|
| 1589 |
+
0.8055555555555568,
|
| 1590 |
+
0.8055555555555568
|
| 1591 |
+
],
|
| 1592 |
+
"f1_ci": [
|
| 1593 |
+
0.834682556458845,
|
| 1594 |
+
0.834682556458845
|
| 1595 |
+
],
|
| 1596 |
+
"specificity_ci": [
|
| 1597 |
+
0.8753473600635171,
|
| 1598 |
+
0.8753473600635171
|
| 1599 |
+
],
|
| 1600 |
+
"npv_ci": [
|
| 1601 |
+
0.8182408543184921,
|
| 1602 |
+
0.8182408543184921
|
| 1603 |
+
],
|
| 1604 |
+
"class_weights": {
|
| 1605 |
+
"0.0": 0.5107185391028186,
|
| 1606 |
+
"1.0": 23.824074074074073
|
| 1607 |
+
}
|
| 1608 |
+
},
|
| 1609 |
+
"insult": {
|
| 1610 |
+
"auc": 0.9170891169219639,
|
| 1611 |
+
"threshold": 0.32249945402145386,
|
| 1612 |
+
"precision": 0.8355108316117581,
|
| 1613 |
+
"recall": 0.8716755319149065,
|
| 1614 |
+
"f1": 0.8532101288125946,
|
| 1615 |
+
"specificity": 0.8283909939593549,
|
| 1616 |
+
"npv": 0.8658697667424693,
|
| 1617 |
+
"positive_samples": 1504,
|
| 1618 |
+
"true_positives": 2242,
|
| 1619 |
+
"false_positives": 441,
|
| 1620 |
+
"true_negatives": 2131,
|
| 1621 |
+
"false_negatives": 330,
|
| 1622 |
+
"auc_ci": [
|
| 1623 |
+
0.9170891169219639,
|
| 1624 |
+
0.9170891169219639
|
| 1625 |
+
],
|
| 1626 |
+
"precision_ci": [
|
| 1627 |
+
0.8355108316117581,
|
| 1628 |
+
0.8355108316117581
|
| 1629 |
+
],
|
| 1630 |
+
"recall_ci": [
|
| 1631 |
+
0.8716755319149065,
|
| 1632 |
+
0.8716755319149065
|
| 1633 |
+
],
|
| 1634 |
+
"f1_ci": [
|
| 1635 |
+
0.8532101288125946,
|
| 1636 |
+
0.8532101288125946
|
| 1637 |
+
],
|
| 1638 |
+
"specificity_ci": [
|
| 1639 |
+
0.8283909939593549,
|
| 1640 |
+
0.8283909939593549
|
| 1641 |
+
],
|
| 1642 |
+
"npv_ci": [
|
| 1643 |
+
0.8658697667424693,
|
| 1644 |
+
0.8658697667424693
|
| 1645 |
+
],
|
| 1646 |
+
"class_weights": {
|
| 1647 |
+
"0.0": 0.7064799560680944,
|
| 1648 |
+
"1.0": 1.7107712765957446
|
| 1649 |
+
}
|
| 1650 |
+
},
|
| 1651 |
+
"identity_hate": {
|
| 1652 |
+
"auc": 0.9171971252566641,
|
| 1653 |
+
"threshold": 0.055891502648591995,
|
| 1654 |
+
"precision": 0.8532420335871026,
|
| 1655 |
+
"recall": 0.829710144927536,
|
| 1656 |
+
"f1": 0.8413115718720496,
|
| 1657 |
+
"specificity": 0.8572895277207252,
|
| 1658 |
+
"npv": 0.8342805841339561,
|
| 1659 |
+
"positive_samples": 276,
|
| 1660 |
+
"true_positives": 2134,
|
| 1661 |
+
"false_positives": 367,
|
| 1662 |
+
"true_negatives": 2205,
|
| 1663 |
+
"false_negatives": 438,
|
| 1664 |
+
"auc_ci": [
|
| 1665 |
+
0.9171971252566641,
|
| 1666 |
+
0.9171971252566641
|
| 1667 |
+
],
|
| 1668 |
+
"precision_ci": [
|
| 1669 |
+
0.8532420335871026,
|
| 1670 |
+
0.8532420335871026
|
| 1671 |
+
],
|
| 1672 |
+
"recall_ci": [
|
| 1673 |
+
0.829710144927536,
|
| 1674 |
+
0.829710144927536
|
| 1675 |
+
],
|
| 1676 |
+
"f1_ci": [
|
| 1677 |
+
0.8413115718720496,
|
| 1678 |
+
0.8413115718720496
|
| 1679 |
+
],
|
| 1680 |
+
"specificity_ci": [
|
| 1681 |
+
0.8572895277207252,
|
| 1682 |
+
0.8572895277207252
|
| 1683 |
+
],
|
| 1684 |
+
"npv_ci": [
|
| 1685 |
+
0.8342805841339561,
|
| 1686 |
+
0.8342805841339561
|
| 1687 |
+
],
|
| 1688 |
+
"class_weights": {
|
| 1689 |
+
"0.0": 0.5283367556468173,
|
| 1690 |
+
"1.0": 9.322463768115941
|
| 1691 |
+
}
|
| 1692 |
+
}
|
| 1693 |
+
},
|
| 1694 |
+
"sample_count": 5146
|
| 1695 |
+
},
|
| 1696 |
+
"6": {
|
| 1697 |
+
"auc": 0.9462815482574403,
|
| 1698 |
+
"precision": 0.7134961462135606,
|
| 1699 |
+
"recall": 0.9073793914943687,
|
| 1700 |
+
"f1": 0.7744642816056855,
|
| 1701 |
+
"hamming_loss": 0.15539933230611197,
|
| 1702 |
+
"exact_match": 0.5132896764252697,
|
| 1703 |
+
"specificity": 0.8360743701752594,
|
| 1704 |
+
"class_metrics": {
|
| 1705 |
+
"toxic": {
|
| 1706 |
+
"auc": 0.9780732995232411,
|
| 1707 |
+
"threshold": 0.5710838437080383,
|
| 1708 |
+
"precision": 0.9379357119021944,
|
| 1709 |
+
"recall": 0.9243012422360248,
|
| 1710 |
+
"f1": 0.9310685643115885,
|
| 1711 |
+
"specificity": 0.9388379204893005,
|
| 1712 |
+
"npv": 0.9253858836387251,
|
| 1713 |
+
"positive_samples": 2576,
|
| 1714 |
+
"true_positives": 2399,
|
| 1715 |
+
"false_positives": 158,
|
| 1716 |
+
"true_negatives": 2437,
|
| 1717 |
+
"false_negatives": 196,
|
| 1718 |
+
"auc_ci": [
|
| 1719 |
+
0.9780732995232411,
|
| 1720 |
+
0.9780732995232411
|
| 1721 |
+
],
|
| 1722 |
+
"precision_ci": [
|
| 1723 |
+
0.9379357119021944,
|
| 1724 |
+
0.9379357119021944
|
| 1725 |
+
],
|
| 1726 |
+
"recall_ci": [
|
| 1727 |
+
0.9243012422360248,
|
| 1728 |
+
0.9243012422360248
|
| 1729 |
+
],
|
| 1730 |
+
"f1_ci": [
|
| 1731 |
+
0.9310685643115885,
|
| 1732 |
+
0.9310685643115885
|
| 1733 |
+
],
|
| 1734 |
+
"specificity_ci": [
|
| 1735 |
+
0.9388379204893005,
|
| 1736 |
+
0.9388379204893005
|
| 1737 |
+
],
|
| 1738 |
+
"npv_ci": [
|
| 1739 |
+
0.9253858836387251,
|
| 1740 |
+
0.9253858836387251
|
| 1741 |
+
],
|
| 1742 |
+
"class_weights": {
|
| 1743 |
+
"0.0": 0.9923547400611621,
|
| 1744 |
+
"1.0": 1.0077639751552796
|
| 1745 |
+
}
|
| 1746 |
+
},
|
| 1747 |
+
"severe_toxic": {
|
| 1748 |
+
"auc": 0.9067576592369966,
|
| 1749 |
+
"threshold": 0.023807251825928688,
|
| 1750 |
+
"precision": 0.7794259030353159,
|
| 1751 |
+
"recall": 0.9380165289256208,
|
| 1752 |
+
"f1": 0.8513989948241057,
|
| 1753 |
+
"specificity": 0.7345454545454645,
|
| 1754 |
+
"npv": 0.9221830255239729,
|
| 1755 |
+
"positive_samples": 242,
|
| 1756 |
+
"true_positives": 2435,
|
| 1757 |
+
"false_positives": 689,
|
| 1758 |
+
"true_negatives": 1906,
|
| 1759 |
+
"false_negatives": 160,
|
| 1760 |
+
"auc_ci": [
|
| 1761 |
+
0.9067576592369966,
|
| 1762 |
+
0.9067576592369966
|
| 1763 |
+
],
|
| 1764 |
+
"precision_ci": [
|
| 1765 |
+
0.7794259030353159,
|
| 1766 |
+
0.7794259030353159
|
| 1767 |
+
],
|
| 1768 |
+
"recall_ci": [
|
| 1769 |
+
0.9380165289256208,
|
| 1770 |
+
0.9380165289256208
|
| 1771 |
+
],
|
| 1772 |
+
"f1_ci": [
|
| 1773 |
+
0.8513989948241057,
|
| 1774 |
+
0.8513989948241057
|
| 1775 |
+
],
|
| 1776 |
+
"specificity_ci": [
|
| 1777 |
+
0.7345454545454645,
|
| 1778 |
+
0.7345454545454645
|
| 1779 |
+
],
|
| 1780 |
+
"npv_ci": [
|
| 1781 |
+
0.9221830255239729,
|
| 1782 |
+
0.9221830255239729
|
| 1783 |
+
],
|
| 1784 |
+
"class_weights": {
|
| 1785 |
+
"0.0": 0.5244444444444445,
|
| 1786 |
+
"1.0": 10.727272727272727
|
| 1787 |
+
}
|
| 1788 |
+
},
|
| 1789 |
+
"obscene": {
|
| 1790 |
+
"auc": 0.9375048626461102,
|
| 1791 |
+
"threshold": 0.14760328829288483,
|
| 1792 |
+
"precision": 0.8287449241470627,
|
| 1793 |
+
"recall": 0.9084278768233371,
|
| 1794 |
+
"f1": 0.8667588986547364,
|
| 1795 |
+
"specificity": 0.8122789287518954,
|
| 1796 |
+
"npv": 0.8986867106241987,
|
| 1797 |
+
"positive_samples": 1234,
|
| 1798 |
+
"true_positives": 2358,
|
| 1799 |
+
"false_positives": 487,
|
| 1800 |
+
"true_negatives": 2108,
|
| 1801 |
+
"false_negatives": 237,
|
| 1802 |
+
"auc_ci": [
|
| 1803 |
+
0.9375048626461102,
|
| 1804 |
+
0.9375048626461102
|
| 1805 |
+
],
|
| 1806 |
+
"precision_ci": [
|
| 1807 |
+
0.8287449241470627,
|
| 1808 |
+
0.8287449241470627
|
| 1809 |
+
],
|
| 1810 |
+
"recall_ci": [
|
| 1811 |
+
0.9084278768233371,
|
| 1812 |
+
0.9084278768233371
|
| 1813 |
+
],
|
| 1814 |
+
"f1_ci": [
|
| 1815 |
+
0.8667588986547364,
|
| 1816 |
+
0.8667588986547364
|
| 1817 |
+
],
|
| 1818 |
+
"specificity_ci": [
|
| 1819 |
+
0.8122789287518954,
|
| 1820 |
+
0.8122789287518954
|
| 1821 |
+
],
|
| 1822 |
+
"npv_ci": [
|
| 1823 |
+
0.8986867106241987,
|
| 1824 |
+
0.8986867106241987
|
| 1825 |
+
],
|
| 1826 |
+
"class_weights": {
|
| 1827 |
+
"0.0": 0.6558868115209702,
|
| 1828 |
+
"1.0": 2.1037277147487843
|
| 1829 |
+
}
|
| 1830 |
+
},
|
| 1831 |
+
"threat": {
|
| 1832 |
+
"auc": 0.9031869137455802,
|
| 1833 |
+
"threshold": 0.026773449033498764,
|
| 1834 |
+
"precision": 0.9112427696973145,
|
| 1835 |
+
"recall": 0.761467889908257,
|
| 1836 |
+
"f1": 0.8296498919893159,
|
| 1837 |
+
"specificity": 0.9258312020460328,
|
| 1838 |
+
"npv": 0.7951394486538688,
|
| 1839 |
+
"positive_samples": 109,
|
| 1840 |
+
"true_positives": 1976,
|
| 1841 |
+
"false_positives": 192,
|
| 1842 |
+
"true_negatives": 2403,
|
| 1843 |
+
"false_negatives": 619,
|
| 1844 |
+
"auc_ci": [
|
| 1845 |
+
0.9031869137455802,
|
| 1846 |
+
0.9031869137455802
|
| 1847 |
+
],
|
| 1848 |
+
"precision_ci": [
|
| 1849 |
+
0.9112427696973145,
|
| 1850 |
+
0.9112427696973145
|
| 1851 |
+
],
|
| 1852 |
+
"recall_ci": [
|
| 1853 |
+
0.761467889908257,
|
| 1854 |
+
0.761467889908257
|
| 1855 |
+
],
|
| 1856 |
+
"f1_ci": [
|
| 1857 |
+
0.8296498919893159,
|
| 1858 |
+
0.8296498919893159
|
| 1859 |
+
],
|
| 1860 |
+
"specificity_ci": [
|
| 1861 |
+
0.9258312020460328,
|
| 1862 |
+
0.9258312020460328
|
| 1863 |
+
],
|
| 1864 |
+
"npv_ci": [
|
| 1865 |
+
0.7951394486538688,
|
| 1866 |
+
0.7951394486538688
|
| 1867 |
+
],
|
| 1868 |
+
"class_weights": {
|
| 1869 |
+
"0.0": 0.5107220145583317,
|
| 1870 |
+
"1.0": 23.81651376146789
|
| 1871 |
+
}
|
| 1872 |
+
},
|
| 1873 |
+
"insult": {
|
| 1874 |
+
"auc": 0.9164838070297321,
|
| 1875 |
+
"threshold": 0.2600024938583374,
|
| 1876 |
+
"precision": 0.8178816065079044,
|
| 1877 |
+
"recall": 0.8940397350993466,
|
| 1878 |
+
"f1": 0.8542666500534941,
|
| 1879 |
+
"specificity": 0.8009234111895767,
|
| 1880 |
+
"npv": 0.8831600262588531,
|
| 1881 |
+
"positive_samples": 1510,
|
| 1882 |
+
"true_positives": 2320,
|
| 1883 |
+
"false_positives": 516,
|
| 1884 |
+
"true_negatives": 2079,
|
| 1885 |
+
"false_negatives": 275,
|
| 1886 |
+
"auc_ci": [
|
| 1887 |
+
0.9164838070297321,
|
| 1888 |
+
0.9164838070297321
|
| 1889 |
+
],
|
| 1890 |
+
"precision_ci": [
|
| 1891 |
+
0.8178816065079044,
|
| 1892 |
+
0.8178816065079044
|
| 1893 |
+
],
|
| 1894 |
+
"recall_ci": [
|
| 1895 |
+
0.8940397350993466,
|
| 1896 |
+
0.8940397350993466
|
| 1897 |
+
],
|
| 1898 |
+
"f1_ci": [
|
| 1899 |
+
0.8542666500534941,
|
| 1900 |
+
0.8542666500534941
|
| 1901 |
+
],
|
| 1902 |
+
"specificity_ci": [
|
| 1903 |
+
0.8009234111895767,
|
| 1904 |
+
0.8009234111895767
|
| 1905 |
+
],
|
| 1906 |
+
"npv_ci": [
|
| 1907 |
+
0.8831600262588531,
|
| 1908 |
+
0.8831600262588531
|
| 1909 |
+
],
|
| 1910 |
+
"class_weights": {
|
| 1911 |
+
"0.0": 0.7050516023900054,
|
| 1912 |
+
"1.0": 1.719205298013245
|
| 1913 |
+
}
|
| 1914 |
+
},
|
| 1915 |
+
"identity_hate": {
|
| 1916 |
+
"auc": 0.9038051609994096,
|
| 1917 |
+
"threshold": 0.03315547853708267,
|
| 1918 |
+
"precision": 0.8124487711378064,
|
| 1919 |
+
"recall": 0.8489208633093526,
|
| 1920 |
+
"f1": 0.8302844808144539,
|
| 1921 |
+
"specificity": 0.804029304029316,
|
| 1922 |
+
"npv": 0.8418199125360486,
|
| 1923 |
+
"positive_samples": 278,
|
| 1924 |
+
"true_positives": 2203,
|
| 1925 |
+
"false_positives": 508,
|
| 1926 |
+
"true_negatives": 2087,
|
| 1927 |
+
"false_negatives": 392,
|
| 1928 |
+
"auc_ci": [
|
| 1929 |
+
0.9038051609994096,
|
| 1930 |
+
0.9038051609994096
|
| 1931 |
+
],
|
| 1932 |
+
"precision_ci": [
|
| 1933 |
+
0.8124487711378064,
|
| 1934 |
+
0.8124487711378064
|
| 1935 |
+
],
|
| 1936 |
+
"recall_ci": [
|
| 1937 |
+
0.8489208633093526,
|
| 1938 |
+
0.8489208633093526
|
| 1939 |
+
],
|
| 1940 |
+
"f1_ci": [
|
| 1941 |
+
0.8302844808144539,
|
| 1942 |
+
0.8302844808144539
|
| 1943 |
+
],
|
| 1944 |
+
"specificity_ci": [
|
| 1945 |
+
0.804029304029316,
|
| 1946 |
+
0.804029304029316
|
| 1947 |
+
],
|
| 1948 |
+
"npv_ci": [
|
| 1949 |
+
0.8418199125360486,
|
| 1950 |
+
0.8418199125360486
|
| 1951 |
+
],
|
| 1952 |
+
"class_weights": {
|
| 1953 |
+
"0.0": 0.5282865282865283,
|
| 1954 |
+
"1.0": 9.338129496402878
|
| 1955 |
+
}
|
| 1956 |
+
}
|
| 1957 |
+
},
|
| 1958 |
+
"sample_count": 5192
|
| 1959 |
+
}
|
| 1960 |
+
},
|
| 1961 |
+
"per_class": {},
|
| 1962 |
+
"thresholds": {
|
| 1963 |
+
"0": {
|
| 1964 |
+
"toxic": 0.46047261357307434,
|
| 1965 |
+
"severe_toxic": 0.03537772223353386,
|
| 1966 |
+
"obscene": 0.2777131497859955,
|
| 1967 |
+
"threat": 0.016539234668016434,
|
| 1968 |
+
"insult": 0.25907590985298157,
|
| 1969 |
+
"identity_hate": 0.026042653247714043
|
| 1970 |
+
},
|
| 1971 |
+
"1": {
|
| 1972 |
+
"toxic": 0.44148319959640503,
|
| 1973 |
+
"severe_toxic": 0.03648429363965988,
|
| 1974 |
+
"obscene": 0.1990610957145691,
|
| 1975 |
+
"threat": 0.012619060464203358,
|
| 1976 |
+
"insult": 0.24214455485343933,
|
| 1977 |
+
"identity_hate": 0.03167847916483879
|
| 1978 |
+
},
|
| 1979 |
+
"2": {
|
| 1980 |
+
"toxic": 0.3978160321712494,
|
| 1981 |
+
"severe_toxic": 0.015000982210040092,
|
| 1982 |
+
"obscene": 0.11362762749195099,
|
| 1983 |
+
"threat": 0.008195769973099232,
|
| 1984 |
+
"insult": 0.1587354838848114,
|
| 1985 |
+
"identity_hate": 0.0467526838183403
|
| 1986 |
+
},
|
| 1987 |
+
"3": {
|
| 1988 |
+
"toxic": 0.5033379793167114,
|
| 1989 |
+
"severe_toxic": 0.021415209397673607,
|
| 1990 |
+
"obscene": 0.14896434545516968,
|
| 1991 |
+
"threat": 0.013273251242935658,
|
| 1992 |
+
"insult": 0.22368550300598145,
|
| 1993 |
+
"identity_hate": 0.042373284697532654
|
| 1994 |
+
},
|
| 1995 |
+
"4": {
|
| 1996 |
+
"toxic": 0.4544762372970581,
|
| 1997 |
+
"severe_toxic": 0.0307308342307806,
|
| 1998 |
+
"obscene": 0.1775909662246704,
|
| 1999 |
+
"threat": 0.014509523287415504,
|
| 2000 |
+
"insult": 0.19917058944702148,
|
| 2001 |
+
"identity_hate": 0.031982019543647766
|
| 2002 |
+
},
|
| 2003 |
+
"5": {
|
| 2004 |
+
"toxic": 0.5313886404037476,
|
| 2005 |
+
"severe_toxic": 0.05001964047551155,
|
| 2006 |
+
"obscene": 0.20134443044662476,
|
| 2007 |
+
"threat": 0.018557138741016388,
|
| 2008 |
+
"insult": 0.32249945402145386,
|
| 2009 |
+
"identity_hate": 0.055891502648591995
|
| 2010 |
+
},
|
| 2011 |
+
"6": {
|
| 2012 |
+
"toxic": 0.5710838437080383,
|
| 2013 |
+
"severe_toxic": 0.023807251825928688,
|
| 2014 |
+
"obscene": 0.14760328829288483,
|
| 2015 |
+
"threat": 0.026773449033498764,
|
| 2016 |
+
"insult": 0.2600024938583374,
|
| 2017 |
+
"identity_hate": 0.03315547853708267
|
| 2018 |
+
}
|
| 2019 |
+
}
|
| 2020 |
+
}
|
evaluation_results/eval_20250208_161149/plots/calibration_0.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_1.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_2.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_3.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_4.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_5.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/calibration_6.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/class_calibration.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250208_161149/plots/language_performance.png
ADDED
|
evaluation_results/eval_20250208_161149/plots/metric_correlations.png
ADDED
|
evaluation_results/eval_20250208_161149/plots/overall_calibration.png
ADDED
|
evaluation_results/eval_20250208_161149/plots/performance_distributions.png
ADDED
|
evaluation_results/eval_20250208_161149/predictions.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d562e6c02fc268d01464f9716846556a75e863ec9cc03d582f39e14191cbd496
|
| 3 |
+
size 809713
|
evaluation_results/eval_20250208_161149/thresholds.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0": {
|
| 3 |
+
"toxic": 0.46047261357307434,
|
| 4 |
+
"severe_toxic": 0.03537772223353386,
|
| 5 |
+
"obscene": 0.2777131497859955,
|
| 6 |
+
"threat": 0.016539234668016434,
|
| 7 |
+
"insult": 0.25907590985298157,
|
| 8 |
+
"identity_hate": 0.026042653247714043
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"toxic": 0.44148319959640503,
|
| 12 |
+
"severe_toxic": 0.03648429363965988,
|
| 13 |
+
"obscene": 0.1990610957145691,
|
| 14 |
+
"threat": 0.012619060464203358,
|
| 15 |
+
"insult": 0.24214455485343933,
|
| 16 |
+
"identity_hate": 0.03167847916483879
|
| 17 |
+
},
|
| 18 |
+
"2": {
|
| 19 |
+
"toxic": 0.3978160321712494,
|
| 20 |
+
"severe_toxic": 0.015000982210040092,
|
| 21 |
+
"obscene": 0.11362762749195099,
|
| 22 |
+
"threat": 0.008195769973099232,
|
| 23 |
+
"insult": 0.1587354838848114,
|
| 24 |
+
"identity_hate": 0.0467526838183403
|
| 25 |
+
},
|
| 26 |
+
"3": {
|
| 27 |
+
"toxic": 0.5033379793167114,
|
| 28 |
+
"severe_toxic": 0.021415209397673607,
|
| 29 |
+
"obscene": 0.14896434545516968,
|
| 30 |
+
"threat": 0.013273251242935658,
|
| 31 |
+
"insult": 0.22368550300598145,
|
| 32 |
+
"identity_hate": 0.042373284697532654
|
| 33 |
+
},
|
| 34 |
+
"4": {
|
| 35 |
+
"toxic": 0.4544762372970581,
|
| 36 |
+
"severe_toxic": 0.0307308342307806,
|
| 37 |
+
"obscene": 0.1775909662246704,
|
| 38 |
+
"threat": 0.014509523287415504,
|
| 39 |
+
"insult": 0.19917058944702148,
|
| 40 |
+
"identity_hate": 0.031982019543647766
|
| 41 |
+
},
|
| 42 |
+
"5": {
|
| 43 |
+
"toxic": 0.5313886404037476,
|
| 44 |
+
"severe_toxic": 0.05001964047551155,
|
| 45 |
+
"obscene": 0.20134443044662476,
|
| 46 |
+
"threat": 0.018557138741016388,
|
| 47 |
+
"insult": 0.32249945402145386,
|
| 48 |
+
"identity_hate": 0.055891502648591995
|
| 49 |
+
},
|
| 50 |
+
"6": {
|
| 51 |
+
"toxic": 0.5710838437080383,
|
| 52 |
+
"severe_toxic": 0.023807251825928688,
|
| 53 |
+
"obscene": 0.14760328829288483,
|
| 54 |
+
"threat": 0.026773449033498764,
|
| 55 |
+
"insult": 0.2600024938583374,
|
| 56 |
+
"identity_hate": 0.03315547853708267
|
| 57 |
+
}
|
| 58 |
+
}
|
evaluation_results/eval_20250401_143401/eval_params.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20250401_143401",
|
| 3 |
+
"model_path": "weights/toxic_classifier_xlm-roberta-large",
|
| 4 |
+
"checkpoint": null,
|
| 5 |
+
"test_file": "dataset/split/val.csv",
|
| 6 |
+
"batch_size": 64,
|
| 7 |
+
"num_workers": 16,
|
| 8 |
+
"cache_dir": "cached_data",
|
| 9 |
+
"force_retokenize": false,
|
| 10 |
+
"prefetch_factor": 2,
|
| 11 |
+
"max_length": 128,
|
| 12 |
+
"gc_frequency": 500,
|
| 13 |
+
"label_columns": [
|
| 14 |
+
"toxic",
|
| 15 |
+
"severe_toxic",
|
| 16 |
+
"obscene",
|
| 17 |
+
"threat",
|
| 18 |
+
"insult",
|
| 19 |
+
"identity_hate"
|
| 20 |
+
]
|
| 21 |
+
}
|
evaluation_results/eval_20250401_143401/evaluation_results.json
ADDED
|
@@ -0,0 +1,684 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"default_thresholds": {
|
| 3 |
+
"overall": {
|
| 4 |
+
"auc_macro": 0.9116120481007194,
|
| 5 |
+
"auc_weighted": 0.9305869103434485,
|
| 6 |
+
"precision_macro": 0.7017348731216243,
|
| 7 |
+
"precision_weighted": 0.7941268867549155,
|
| 8 |
+
"recall_macro": 0.4685972374699909,
|
| 9 |
+
"recall_weighted": 0.7276981501898812,
|
| 10 |
+
"f1_macro": 0.5228946160541719,
|
| 11 |
+
"f1_weighted": 0.7469638283202927,
|
| 12 |
+
"hamming_loss": 0.08497391889618038,
|
| 13 |
+
"exact_match": 0.6461383139828369
|
| 14 |
+
},
|
| 15 |
+
"per_language": {
|
| 16 |
+
"0": {
|
| 17 |
+
"auc_macro": 0.9445681226397739,
|
| 18 |
+
"auc_weighted": 0.9465404082666297,
|
| 19 |
+
"precision_macro": 0.7219326082283263,
|
| 20 |
+
"precision_weighted": 0.7908382685179838,
|
| 21 |
+
"recall_macro": 0.5535398284592582,
|
| 22 |
+
"recall_weighted": 0.7833787465940054,
|
| 23 |
+
"f1_macro": 0.6000668677340134,
|
| 24 |
+
"f1_weighted": 0.7786737821480415,
|
| 25 |
+
"hamming_loss": 0.07650567773465575,
|
| 26 |
+
"exact_match": 0.6601983613626563,
|
| 27 |
+
"sample_count": 4638
|
| 28 |
+
},
|
| 29 |
+
"1": {
|
| 30 |
+
"auc_macro": 0.9064189306891727,
|
| 31 |
+
"auc_weighted": 0.9274078123911156,
|
| 32 |
+
"precision_macro": 0.6864158919056594,
|
| 33 |
+
"precision_weighted": 0.7852581089086744,
|
| 34 |
+
"recall_macro": 0.44366116589032245,
|
| 35 |
+
"recall_weighted": 0.7238780977896851,
|
| 36 |
+
"f1_macro": 0.48488161881757197,
|
| 37 |
+
"f1_weighted": 0.737051270947713,
|
| 38 |
+
"hamming_loss": 0.08752166377816291,
|
| 39 |
+
"exact_match": 0.6402849990371654,
|
| 40 |
+
"sample_count": 5193
|
| 41 |
+
},
|
| 42 |
+
"2": {
|
| 43 |
+
"auc_macro": 0.8945135400492461,
|
| 44 |
+
"auc_weighted": 0.9120120071881025,
|
| 45 |
+
"precision_macro": 0.7178271955012184,
|
| 46 |
+
"precision_weighted": 0.7982113173628885,
|
| 47 |
+
"recall_macro": 0.4043111379749362,
|
| 48 |
+
"recall_weighted": 0.6535947712418301,
|
| 49 |
+
"f1_macro": 0.4738257066120983,
|
| 50 |
+
"f1_weighted": 0.7027905834489889,
|
| 51 |
+
"hamming_loss": 0.09504905757810483,
|
| 52 |
+
"exact_match": 0.6229666924864447,
|
| 53 |
+
"sample_count": 5164
|
| 54 |
+
},
|
| 55 |
+
"3": {
|
| 56 |
+
"auc_macro": 0.9135727964673032,
|
| 57 |
+
"auc_weighted": 0.9339502655719858,
|
| 58 |
+
"precision_macro": 0.7093511783545062,
|
| 59 |
+
"precision_weighted": 0.7989932896421867,
|
| 60 |
+
"recall_macro": 0.4814045378504133,
|
| 61 |
+
"recall_weighted": 0.7405478070912451,
|
| 62 |
+
"f1_macro": 0.5327086132158053,
|
| 63 |
+
"f1_weighted": 0.7545000455696493,
|
| 64 |
+
"hamming_loss": 0.08359133126934984,
|
| 65 |
+
"exact_match": 0.6480263157894737,
|
| 66 |
+
"sample_count": 5168
|
| 67 |
+
},
|
| 68 |
+
"4": {
|
| 69 |
+
"auc_macro": 0.9050160058685811,
|
| 70 |
+
"auc_weighted": 0.9286663336151794,
|
| 71 |
+
"precision_macro": 0.6819384343494851,
|
| 72 |
+
"precision_weighted": 0.7945304496145832,
|
| 73 |
+
"recall_macro": 0.4656370270227365,
|
| 74 |
+
"recall_weighted": 0.7256427604871448,
|
| 75 |
+
"f1_macro": 0.5189060171591118,
|
| 76 |
+
"f1_weighted": 0.7474398480273773,
|
| 77 |
+
"hamming_loss": 0.08477150798267727,
|
| 78 |
+
"exact_match": 0.6509598603839442,
|
| 79 |
+
"sample_count": 5157
|
| 80 |
+
},
|
| 81 |
+
"5": {
|
| 82 |
+
"auc_macro": 0.9115535221829411,
|
| 83 |
+
"auc_weighted": 0.9337271942250184,
|
| 84 |
+
"precision_macro": 0.6927437323462047,
|
| 85 |
+
"precision_weighted": 0.7984424245250574,
|
| 86 |
+
"recall_macro": 0.4695924180409275,
|
| 87 |
+
"recall_weighted": 0.739629005059022,
|
| 88 |
+
"f1_macro": 0.5191221600663896,
|
| 89 |
+
"f1_weighted": 0.7554966948679994,
|
| 90 |
+
"hamming_loss": 0.08252364295893251,
|
| 91 |
+
"exact_match": 0.6525456665371162,
|
| 92 |
+
"sample_count": 5146
|
| 93 |
+
},
|
| 94 |
+
"6": {
|
| 95 |
+
"auc_macro": 0.9045493247421005,
|
| 96 |
+
"auc_weighted": 0.9308415576648513,
|
| 97 |
+
"precision_macro": 0.6958021612757893,
|
| 98 |
+
"precision_weighted": 0.7925797967619269,
|
| 99 |
+
"recall_macro": 0.4680867128534896,
|
| 100 |
+
"recall_weighted": 0.735071488645921,
|
| 101 |
+
"f1_macro": 0.5184729138243417,
|
| 102 |
+
"f1_weighted": 0.7510735996739993,
|
| 103 |
+
"hamming_loss": 0.0839753466872111,
|
| 104 |
+
"exact_match": 0.6494607087827426,
|
| 105 |
+
"sample_count": 5192
|
| 106 |
+
}
|
| 107 |
+
},
|
| 108 |
+
"per_class": {
|
| 109 |
+
"toxic": {
|
| 110 |
+
"auc": 0.9619106577495796,
|
| 111 |
+
"threshold": 0.5,
|
| 112 |
+
"precision": 0.9067127628925382,
|
| 113 |
+
"recall": 0.8891902582358592,
|
| 114 |
+
"f1": 0.8978660276161132,
|
| 115 |
+
"support": 17697,
|
| 116 |
+
"brier": 0.09342169378057544,
|
| 117 |
+
"true_positives": 15736,
|
| 118 |
+
"false_positives": 1619,
|
| 119 |
+
"true_negatives": 16342,
|
| 120 |
+
"false_negatives": 1961
|
| 121 |
+
},
|
| 122 |
+
"severe_toxic": {
|
| 123 |
+
"auc": 0.9017555053121755,
|
| 124 |
+
"threshold": 0.5,
|
| 125 |
+
"precision": 0.5620915032679739,
|
| 126 |
+
"recall": 0.15589123867069488,
|
| 127 |
+
"f1": 0.24408703878902555,
|
| 128 |
+
"support": 1655,
|
| 129 |
+
"brier": 0.05564494143865772,
|
| 130 |
+
"true_positives": 258,
|
| 131 |
+
"false_positives": 201,
|
| 132 |
+
"true_negatives": 33802,
|
| 133 |
+
"false_negatives": 1397
|
| 134 |
+
},
|
| 135 |
+
"obscene": {
|
| 136 |
+
"auc": 0.9247491461802884,
|
| 137 |
+
"threshold": 0.5,
|
| 138 |
+
"precision": 0.7636434008515031,
|
| 139 |
+
"recall": 0.686181312311616,
|
| 140 |
+
"f1": 0.7228430115405752,
|
| 141 |
+
"support": 8626,
|
| 142 |
+
"brier": 0.1102165916686836,
|
| 143 |
+
"true_positives": 5919,
|
| 144 |
+
"false_positives": 1832,
|
| 145 |
+
"true_negatives": 25200,
|
| 146 |
+
"false_negatives": 2707
|
| 147 |
+
},
|
| 148 |
+
"threat": {
|
| 149 |
+
"auc": 0.8978719938708597,
|
| 150 |
+
"threshold": 0.5,
|
| 151 |
+
"precision": 0.6042553191489362,
|
| 152 |
+
"recall": 0.1868421052631579,
|
| 153 |
+
"f1": 0.28542713567839195,
|
| 154 |
+
"support": 760,
|
| 155 |
+
"brier": 0.03694216309848939,
|
| 156 |
+
"true_positives": 142,
|
| 157 |
+
"false_positives": 93,
|
| 158 |
+
"true_negatives": 34805,
|
| 159 |
+
"false_negatives": 618
|
| 160 |
+
},
|
| 161 |
+
"insult": {
|
| 162 |
+
"auc": 0.8962985964590791,
|
| 163 |
+
"threshold": 0.5,
|
| 164 |
+
"precision": 0.6981960484871623,
|
| 165 |
+
"recall": 0.7172271791352093,
|
| 166 |
+
"f1": 0.7075836718901142,
|
| 167 |
+
"support": 10199,
|
| 168 |
+
"brier": 0.1366709113756841,
|
| 169 |
+
"true_positives": 7315,
|
| 170 |
+
"false_positives": 3162,
|
| 171 |
+
"true_negatives": 22297,
|
| 172 |
+
"false_negatives": 2884
|
| 173 |
+
},
|
| 174 |
+
"identity_hate": {
|
| 175 |
+
"auc": 0.887086389032334,
|
| 176 |
+
"threshold": 0.5,
|
| 177 |
+
"precision": 0.6755102040816326,
|
| 178 |
+
"recall": 0.17625133120340788,
|
| 179 |
+
"f1": 0.2795608108108108,
|
| 180 |
+
"support": 1878,
|
| 181 |
+
"brier": 0.06076370760519854,
|
| 182 |
+
"true_positives": 331,
|
| 183 |
+
"false_positives": 159,
|
| 184 |
+
"true_negatives": 33621,
|
| 185 |
+
"false_negatives": 1547
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
},
|
| 189 |
+
"optimized_thresholds": {
|
| 190 |
+
"overall": {
|
| 191 |
+
"auc_macro": 0.9116120481007194,
|
| 192 |
+
"auc_weighted": 0.9305869103434485,
|
| 193 |
+
"precision_macro": 0.5775888380947196,
|
| 194 |
+
"precision_weighted": 0.7443465124836487,
|
| 195 |
+
"recall_macro": 0.639900823721825,
|
| 196 |
+
"recall_weighted": 0.798186941075585,
|
| 197 |
+
"f1_macro": 0.6040131510667749,
|
| 198 |
+
"f1_weighted": 0.7686775463209056,
|
| 199 |
+
"hamming_loss": 0.09459775272496121,
|
| 200 |
+
"exact_match": 0.6191317516405855
|
| 201 |
+
},
|
| 202 |
+
"per_language": {
|
| 203 |
+
"0": {
|
| 204 |
+
"auc_macro": 0.9445681226397739,
|
| 205 |
+
"auc_weighted": 0.9465404082666297,
|
| 206 |
+
"precision_macro": 0.5885969911405202,
|
| 207 |
+
"precision_weighted": 0.7416734521846035,
|
| 208 |
+
"recall_macro": 0.7381385425477333,
|
| 209 |
+
"recall_weighted": 0.8514986376021798,
|
| 210 |
+
"f1_macro": 0.6497623010487168,
|
| 211 |
+
"f1_weighted": 0.7903759805291908,
|
| 212 |
+
"hamming_loss": 0.08746586172200661,
|
| 213 |
+
"exact_match": 0.6282880551962052,
|
| 214 |
+
"sample_count": 4638
|
| 215 |
+
},
|
| 216 |
+
"1": {
|
| 217 |
+
"auc_macro": 0.9064189306891727,
|
| 218 |
+
"auc_weighted": 0.9274078123911156,
|
| 219 |
+
"precision_macro": 0.5769491938694048,
|
| 220 |
+
"precision_weighted": 0.7372462490399235,
|
| 221 |
+
"recall_macro": 0.6223651765807731,
|
| 222 |
+
"recall_weighted": 0.7957133288680509,
|
| 223 |
+
"f1_macro": 0.5940383621467368,
|
| 224 |
+
"f1_weighted": 0.7630519259035966,
|
| 225 |
+
"hamming_loss": 0.09734257654534952,
|
| 226 |
+
"exact_match": 0.6112073945696129,
|
| 227 |
+
"sample_count": 5193
|
| 228 |
+
},
|
| 229 |
+
"2": {
|
| 230 |
+
"auc_macro": 0.8945135400492461,
|
| 231 |
+
"auc_weighted": 0.9120120071881025,
|
| 232 |
+
"precision_macro": 0.5883546567568967,
|
| 233 |
+
"precision_weighted": 0.7471472711374241,
|
| 234 |
+
"recall_macro": 0.5741089328356292,
|
| 235 |
+
"recall_weighted": 0.7323613205966147,
|
| 236 |
+
"f1_macro": 0.579910490554519,
|
| 237 |
+
"f1_weighted": 0.7393192722268676,
|
| 238 |
+
"hamming_loss": 0.10030983733539892,
|
| 239 |
+
"exact_match": 0.6094113090627421,
|
| 240 |
+
"sample_count": 5164
|
| 241 |
+
},
|
| 242 |
+
"3": {
|
| 243 |
+
"auc_macro": 0.9135727964673032,
|
| 244 |
+
"auc_weighted": 0.9339502655719858,
|
| 245 |
+
"precision_macro": 0.5674300764951785,
|
| 246 |
+
"precision_weighted": 0.7452385794349706,
|
| 247 |
+
"recall_macro": 0.6585754182827804,
|
| 248 |
+
"recall_weighted": 0.8117963367501261,
|
| 249 |
+
"f1_macro": 0.6075512335059755,
|
| 250 |
+
"f1_weighted": 0.7751847838928642,
|
| 251 |
+
"hamming_loss": 0.09404024767801858,
|
| 252 |
+
"exact_match": 0.6234520123839009,
|
| 253 |
+
"sample_count": 5168
|
| 254 |
+
},
|
| 255 |
+
"4": {
|
| 256 |
+
"auc_macro": 0.9050160058685811,
|
| 257 |
+
"auc_weighted": 0.9286663336151794,
|
| 258 |
+
"precision_macro": 0.5635774868138544,
|
| 259 |
+
"precision_weighted": 0.7453012013072762,
|
| 260 |
+
"recall_macro": 0.6307198572670079,
|
| 261 |
+
"recall_weighted": 0.793640054127199,
|
| 262 |
+
"f1_macro": 0.5906173214394316,
|
| 263 |
+
"f1_weighted": 0.7663604150980545,
|
| 264 |
+
"hamming_loss": 0.0963415422403206,
|
| 265 |
+
"exact_match": 0.6162497576110142,
|
| 266 |
+
"sample_count": 5157
|
| 267 |
+
},
|
| 268 |
+
"5": {
|
| 269 |
+
"auc_macro": 0.9115535221829411,
|
| 270 |
+
"auc_weighted": 0.9337271942250184,
|
| 271 |
+
"precision_macro": 0.577007586897046,
|
| 272 |
+
"precision_weighted": 0.7468873881119108,
|
| 273 |
+
"recall_macro": 0.635638229939968,
|
| 274 |
+
"recall_weighted": 0.8080944350758853,
|
| 275 |
+
"f1_macro": 0.5988862551226474,
|
| 276 |
+
"f1_weighted": 0.7742215916662522,
|
| 277 |
+
"hamming_loss": 0.09350304443580774,
|
| 278 |
+
"exact_match": 0.6195102992615624,
|
| 279 |
+
"sample_count": 5146
|
| 280 |
+
},
|
| 281 |
+
"6": {
|
| 282 |
+
"auc_macro": 0.9045493247421005,
|
| 283 |
+
"auc_weighted": 0.9308415576648513,
|
| 284 |
+
"precision_macro": 0.591572349044604,
|
| 285 |
+
"precision_weighted": 0.749047954356656,
|
| 286 |
+
"recall_macro": 0.6294384348455582,
|
| 287 |
+
"recall_weighted": 0.8016820857863751,
|
| 288 |
+
"f1_macro": 0.6039252504591597,
|
| 289 |
+
"f1_weighted": 0.772582192067038,
|
| 290 |
+
"hamming_loss": 0.09244992295839753,
|
| 291 |
+
"exact_match": 0.6267334360554699,
|
| 292 |
+
"sample_count": 5192
|
| 293 |
+
}
|
| 294 |
+
},
|
| 295 |
+
"per_class": {
|
| 296 |
+
"toxic": {
|
| 297 |
+
"auc": 0.9619106577495796,
|
| 298 |
+
"threshold": 0.4877551020408163,
|
| 299 |
+
"precision": 0.8999716472923164,
|
| 300 |
+
"recall": 0.8968186698310449,
|
| 301 |
+
"f1": 0.8983923921657421,
|
| 302 |
+
"support": 17697,
|
| 303 |
+
"brier": 0.09342169378057544,
|
| 304 |
+
"true_positives": 15871,
|
| 305 |
+
"false_positives": 1764,
|
| 306 |
+
"true_negatives": 16197,
|
| 307 |
+
"false_negatives": 1826
|
| 308 |
+
},
|
| 309 |
+
"severe_toxic": {
|
| 310 |
+
"auc": 0.9017555053121755,
|
| 311 |
+
"threshold": 0.373469387755102,
|
| 312 |
+
"precision": 0.34626149540183926,
|
| 313 |
+
"recall": 0.5232628398791541,
|
| 314 |
+
"f1": 0.4167468719923003,
|
| 315 |
+
"support": 1655,
|
| 316 |
+
"brier": 0.05564494143865772,
|
| 317 |
+
"true_positives": 866,
|
| 318 |
+
"false_positives": 1635,
|
| 319 |
+
"true_negatives": 32368,
|
| 320 |
+
"false_negatives": 789
|
| 321 |
+
},
|
| 322 |
+
"obscene": {
|
| 323 |
+
"auc": 0.9247491461802884,
|
| 324 |
+
"threshold": 0.4551020408163265,
|
| 325 |
+
"precision": 0.7017099430018999,
|
| 326 |
+
"recall": 0.770693252956179,
|
| 327 |
+
"f1": 0.734585635359116,
|
| 328 |
+
"support": 8626,
|
| 329 |
+
"brier": 0.1102165916686836,
|
| 330 |
+
"true_positives": 6648,
|
| 331 |
+
"false_positives": 2826,
|
| 332 |
+
"true_negatives": 24206,
|
| 333 |
+
"false_negatives": 1978
|
| 334 |
+
},
|
| 335 |
+
"threat": {
|
| 336 |
+
"auc": 0.8978719938708597,
|
| 337 |
+
"threshold": 0.38979591836734695,
|
| 338 |
+
"precision": 0.43684992570579495,
|
| 339 |
+
"recall": 0.3868421052631579,
|
| 340 |
+
"f1": 0.41032798325191905,
|
| 341 |
+
"support": 760,
|
| 342 |
+
"brier": 0.03694216309848939,
|
| 343 |
+
"true_positives": 294,
|
| 344 |
+
"false_positives": 379,
|
| 345 |
+
"true_negatives": 34519,
|
| 346 |
+
"false_negatives": 466
|
| 347 |
+
},
|
| 348 |
+
"insult": {
|
| 349 |
+
"auc": 0.8962985964590791,
|
| 350 |
+
"threshold": 0.463265306122449,
|
| 351 |
+
"precision": 0.6568989575638184,
|
| 352 |
+
"recall": 0.7846847730169625,
|
| 353 |
+
"f1": 0.7151282280403896,
|
| 354 |
+
"support": 10199,
|
| 355 |
+
"brier": 0.1366709113756841,
|
| 356 |
+
"true_positives": 8003,
|
| 357 |
+
"false_positives": 4180,
|
| 358 |
+
"true_negatives": 21279,
|
| 359 |
+
"false_negatives": 2196
|
| 360 |
+
},
|
| 361 |
+
"identity_hate": {
|
| 362 |
+
"auc": 0.887086389032334,
|
| 363 |
+
"threshold": 0.373469387755102,
|
| 364 |
+
"precision": 0.423841059602649,
|
| 365 |
+
"recall": 0.47710330138445156,
|
| 366 |
+
"f1": 0.44889779559118237,
|
| 367 |
+
"support": 1878,
|
| 368 |
+
"brier": 0.06076370760519854,
|
| 369 |
+
"true_positives": 896,
|
| 370 |
+
"false_positives": 1218,
|
| 371 |
+
"true_negatives": 32562,
|
| 372 |
+
"false_negatives": 982
|
| 373 |
+
}
|
| 374 |
+
}
|
| 375 |
+
},
|
| 376 |
+
"thresholds": {
|
| 377 |
+
"global": {
|
| 378 |
+
"toxic": {
|
| 379 |
+
"threshold": 0.4877551020408163,
|
| 380 |
+
"f1_score": 0.8926184748925591,
|
| 381 |
+
"support": 17697,
|
| 382 |
+
"total_samples": 35658
|
| 383 |
+
},
|
| 384 |
+
"severe_toxic": {
|
| 385 |
+
"threshold": 0.373469387755102,
|
| 386 |
+
"f1_score": 0.41132469871513055,
|
| 387 |
+
"support": 1655,
|
| 388 |
+
"total_samples": 35658
|
| 389 |
+
},
|
| 390 |
+
"obscene": {
|
| 391 |
+
"threshold": 0.4551020408163265,
|
| 392 |
+
"f1_score": 0.726924984126118,
|
| 393 |
+
"support": 8626,
|
| 394 |
+
"total_samples": 35658
|
| 395 |
+
},
|
| 396 |
+
"threat": {
|
| 397 |
+
"threshold": 0.38979591836734695,
|
| 398 |
+
"f1_score": 0.41018044345470683,
|
| 399 |
+
"support": 760,
|
| 400 |
+
"total_samples": 35658
|
| 401 |
+
},
|
| 402 |
+
"insult": {
|
| 403 |
+
"threshold": 0.463265306122449,
|
| 404 |
+
"f1_score": 0.7104171976414078,
|
| 405 |
+
"support": 10199,
|
| 406 |
+
"total_samples": 35658
|
| 407 |
+
},
|
| 408 |
+
"identity_hate": {
|
| 409 |
+
"threshold": 0.373469387755102,
|
| 410 |
+
"f1_score": 0.4444212159518569,
|
| 411 |
+
"support": 1878,
|
| 412 |
+
"total_samples": 35658
|
| 413 |
+
}
|
| 414 |
+
},
|
| 415 |
+
"per_language": {
|
| 416 |
+
"0": {
|
| 417 |
+
"toxic": {
|
| 418 |
+
"threshold": 0.4379310344827586,
|
| 419 |
+
"f1_score": 0.6362062357467935,
|
| 420 |
+
"support": 2228,
|
| 421 |
+
"total_samples": 4638
|
| 422 |
+
},
|
| 423 |
+
"severe_toxic": {
|
| 424 |
+
"threshold": 0.4241379310344827,
|
| 425 |
+
"f1_score": 0.6836346572759443,
|
| 426 |
+
"support": 199,
|
| 427 |
+
"total_samples": 4638
|
| 428 |
+
},
|
| 429 |
+
"obscene": {
|
| 430 |
+
"threshold": 0.4655172413793103,
|
| 431 |
+
"f1_score": 0.4812423489705398,
|
| 432 |
+
"support": 1235,
|
| 433 |
+
"total_samples": 4638
|
| 434 |
+
},
|
| 435 |
+
"threat": {
|
| 436 |
+
"threshold": 0.4655172413793103,
|
| 437 |
+
"f1_score": 0.560716193430073,
|
| 438 |
+
"support": 118,
|
| 439 |
+
"total_samples": 4638
|
| 440 |
+
},
|
| 441 |
+
"insult": {
|
| 442 |
+
"threshold": 0.6586206896551723,
|
| 443 |
+
"f1_score": 0.6797683196093679,
|
| 444 |
+
"support": 1144,
|
| 445 |
+
"total_samples": 4638
|
| 446 |
+
},
|
| 447 |
+
"identity_hate": {
|
| 448 |
+
"threshold": 0.6310344827586206,
|
| 449 |
+
"f1_score": 0.4653856089660791,
|
| 450 |
+
"support": 214,
|
| 451 |
+
"total_samples": 4638
|
| 452 |
+
}
|
| 453 |
+
},
|
| 454 |
+
"1": {
|
| 455 |
+
"toxic": {
|
| 456 |
+
"threshold": 0.38275862068965516,
|
| 457 |
+
"f1_score": 0.5653885349662379,
|
| 458 |
+
"support": 2589,
|
| 459 |
+
"total_samples": 5193
|
| 460 |
+
},
|
| 461 |
+
"severe_toxic": {
|
| 462 |
+
"threshold": 0.36896551724137927,
|
| 463 |
+
"f1_score": 0.6303988062940857,
|
| 464 |
+
"support": 245,
|
| 465 |
+
"total_samples": 5193
|
| 466 |
+
},
|
| 467 |
+
"obscene": {
|
| 468 |
+
"threshold": 0.6724137931034482,
|
| 469 |
+
"f1_score": 0.69776888519452,
|
| 470 |
+
"support": 1239,
|
| 471 |
+
"total_samples": 5193
|
| 472 |
+
},
|
| 473 |
+
"threat": {
|
| 474 |
+
"threshold": 0.5482758620689655,
|
| 475 |
+
"f1_score": 0.49444444444444446,
|
| 476 |
+
"support": 106,
|
| 477 |
+
"total_samples": 5193
|
| 478 |
+
},
|
| 479 |
+
"insult": {
|
| 480 |
+
"threshold": 0.45172413793103444,
|
| 481 |
+
"f1_score": 0.43592427815977264,
|
| 482 |
+
"support": 1514,
|
| 483 |
+
"total_samples": 5193
|
| 484 |
+
},
|
| 485 |
+
"identity_hate": {
|
| 486 |
+
"threshold": 0.603448275862069,
|
| 487 |
+
"f1_score": 0.437278850182076,
|
| 488 |
+
"support": 279,
|
| 489 |
+
"total_samples": 5193
|
| 490 |
+
}
|
| 491 |
+
},
|
| 492 |
+
"2": {
|
| 493 |
+
"toxic": {
|
| 494 |
+
"threshold": 0.36896551724137927,
|
| 495 |
+
"f1_score": 0.5636259188109024,
|
| 496 |
+
"support": 2585,
|
| 497 |
+
"total_samples": 5164
|
| 498 |
+
},
|
| 499 |
+
"severe_toxic": {
|
| 500 |
+
"threshold": 0.396551724137931,
|
| 501 |
+
"f1_score": 0.6242565552619788,
|
| 502 |
+
"support": 243,
|
| 503 |
+
"total_samples": 5164
|
| 504 |
+
},
|
| 505 |
+
"obscene": {
|
| 506 |
+
"threshold": 0.6310344827586206,
|
| 507 |
+
"f1_score": 0.609064783177638,
|
| 508 |
+
"support": 1233,
|
| 509 |
+
"total_samples": 5164
|
| 510 |
+
},
|
| 511 |
+
"threat": {
|
| 512 |
+
"threshold": 0.6862068965517241,
|
| 513 |
+
"f1_score": 0.4331632653061225,
|
| 514 |
+
"support": 110,
|
| 515 |
+
"total_samples": 5164
|
| 516 |
+
},
|
| 517 |
+
"insult": {
|
| 518 |
+
"threshold": 0.6586206896551723,
|
| 519 |
+
"f1_score": 0.5919194590653671,
|
| 520 |
+
"support": 1514,
|
| 521 |
+
"total_samples": 5164
|
| 522 |
+
},
|
| 523 |
+
"identity_hate": {
|
| 524 |
+
"threshold": 0.5896551724137931,
|
| 525 |
+
"f1_score": 0.44181963497241983,
|
| 526 |
+
"support": 282,
|
| 527 |
+
"total_samples": 5164
|
| 528 |
+
}
|
| 529 |
+
},
|
| 530 |
+
"3": {
|
| 531 |
+
"toxic": {
|
| 532 |
+
"threshold": 0.35517241379310344,
|
| 533 |
+
"f1_score": 0.5733103161693534,
|
| 534 |
+
"support": 2579,
|
| 535 |
+
"total_samples": 5168
|
| 536 |
+
},
|
| 537 |
+
"severe_toxic": {
|
| 538 |
+
"threshold": 0.38275862068965516,
|
| 539 |
+
"f1_score": 0.6597492750378473,
|
| 540 |
+
"support": 243,
|
| 541 |
+
"total_samples": 5168
|
| 542 |
+
},
|
| 543 |
+
"obscene": {
|
| 544 |
+
"threshold": 0.5896551724137931,
|
| 545 |
+
"f1_score": 0.5803338639295222,
|
| 546 |
+
"support": 1234,
|
| 547 |
+
"total_samples": 5168
|
| 548 |
+
},
|
| 549 |
+
"threat": {
|
| 550 |
+
"threshold": 0.5896551724137931,
|
| 551 |
+
"f1_score": 0.5531975271105706,
|
| 552 |
+
"support": 108,
|
| 553 |
+
"total_samples": 5168
|
| 554 |
+
},
|
| 555 |
+
"insult": {
|
| 556 |
+
"threshold": 0.4103448275862069,
|
| 557 |
+
"f1_score": 0.43932768516388326,
|
| 558 |
+
"support": 1511,
|
| 559 |
+
"total_samples": 5168
|
| 560 |
+
},
|
| 561 |
+
"identity_hate": {
|
| 562 |
+
"threshold": 0.5482758620689655,
|
| 563 |
+
"f1_score": 0.5223443223443224,
|
| 564 |
+
"support": 276,
|
| 565 |
+
"total_samples": 5168
|
| 566 |
+
}
|
| 567 |
+
},
|
| 568 |
+
"4": {
|
| 569 |
+
"toxic": {
|
| 570 |
+
"threshold": 0.36896551724137927,
|
| 571 |
+
"f1_score": 0.5671790360963849,
|
| 572 |
+
"support": 2568,
|
| 573 |
+
"total_samples": 5157
|
| 574 |
+
},
|
| 575 |
+
"severe_toxic": {
|
| 576 |
+
"threshold": 0.4241379310344827,
|
| 577 |
+
"f1_score": 0.6449236298292902,
|
| 578 |
+
"support": 240,
|
| 579 |
+
"total_samples": 5157
|
| 580 |
+
},
|
| 581 |
+
"obscene": {
|
| 582 |
+
"threshold": 0.5896551724137931,
|
| 583 |
+
"f1_score": 0.5763915317957939,
|
| 584 |
+
"support": 1225,
|
| 585 |
+
"total_samples": 5157
|
| 586 |
+
},
|
| 587 |
+
"threat": {
|
| 588 |
+
"threshold": 0.5482758620689655,
|
| 589 |
+
"f1_score": 0.5202898550724637,
|
| 590 |
+
"support": 105,
|
| 591 |
+
"total_samples": 5157
|
| 592 |
+
},
|
| 593 |
+
"insult": {
|
| 594 |
+
"threshold": 0.45172413793103444,
|
| 595 |
+
"f1_score": 0.44168323420099964,
|
| 596 |
+
"support": 1501,
|
| 597 |
+
"total_samples": 5157
|
| 598 |
+
},
|
| 599 |
+
"identity_hate": {
|
| 600 |
+
"threshold": 0.5344827586206896,
|
| 601 |
+
"f1_score": 0.3050612442147916,
|
| 602 |
+
"support": 273,
|
| 603 |
+
"total_samples": 5157
|
| 604 |
+
}
|
| 605 |
+
},
|
| 606 |
+
"5": {
|
| 607 |
+
"toxic": {
|
| 608 |
+
"threshold": 0.38275862068965516,
|
| 609 |
+
"f1_score": 0.5689208863252881,
|
| 610 |
+
"support": 2572,
|
| 611 |
+
"total_samples": 5146
|
| 612 |
+
},
|
| 613 |
+
"severe_toxic": {
|
| 614 |
+
"threshold": 0.38275862068965516,
|
| 615 |
+
"f1_score": 0.6483406115143644,
|
| 616 |
+
"support": 242,
|
| 617 |
+
"total_samples": 5146
|
| 618 |
+
},
|
| 619 |
+
"obscene": {
|
| 620 |
+
"threshold": 0.6172413793103448,
|
| 621 |
+
"f1_score": 0.7591744574190955,
|
| 622 |
+
"support": 1227,
|
| 623 |
+
"total_samples": 5146
|
| 624 |
+
},
|
| 625 |
+
"threat": {
|
| 626 |
+
"threshold": 0.5896551724137931,
|
| 627 |
+
"f1_score": 0.48909813468905516,
|
| 628 |
+
"support": 106,
|
| 629 |
+
"total_samples": 5146
|
| 630 |
+
},
|
| 631 |
+
"insult": {
|
| 632 |
+
"threshold": 0.4655172413793103,
|
| 633 |
+
"f1_score": 0.4438765689644482,
|
| 634 |
+
"support": 1506,
|
| 635 |
+
"total_samples": 5146
|
| 636 |
+
},
|
| 637 |
+
"identity_hate": {
|
| 638 |
+
"threshold": 0.4655172413793103,
|
| 639 |
+
"f1_score": 0.57592394533571,
|
| 640 |
+
"support": 277,
|
| 641 |
+
"total_samples": 5146
|
| 642 |
+
}
|
| 643 |
+
},
|
| 644 |
+
"6": {
|
| 645 |
+
"toxic": {
|
| 646 |
+
"threshold": 0.396551724137931,
|
| 647 |
+
"f1_score": 0.5707684299142913,
|
| 648 |
+
"support": 2576,
|
| 649 |
+
"total_samples": 5192
|
| 650 |
+
},
|
| 651 |
+
"severe_toxic": {
|
| 652 |
+
"threshold": 0.38275862068965516,
|
| 653 |
+
"f1_score": 0.6300280234278585,
|
| 654 |
+
"support": 243,
|
| 655 |
+
"total_samples": 5192
|
| 656 |
+
},
|
| 657 |
+
"obscene": {
|
| 658 |
+
"threshold": 0.603448275862069,
|
| 659 |
+
"f1_score": 0.5508854395728676,
|
| 660 |
+
"support": 1233,
|
| 661 |
+
"total_samples": 5192
|
| 662 |
+
},
|
| 663 |
+
"threat": {
|
| 664 |
+
"threshold": 0.4655172413793103,
|
| 665 |
+
"f1_score": 0.6029992790194665,
|
| 666 |
+
"support": 107,
|
| 667 |
+
"total_samples": 5192
|
| 668 |
+
},
|
| 669 |
+
"insult": {
|
| 670 |
+
"threshold": 0.4241379310344827,
|
| 671 |
+
"f1_score": 0.4434943555473952,
|
| 672 |
+
"support": 1509,
|
| 673 |
+
"total_samples": 5192
|
| 674 |
+
},
|
| 675 |
+
"identity_hate": {
|
| 676 |
+
"threshold": 0.6586206896551723,
|
| 677 |
+
"f1_score": 0.4569864410513042,
|
| 678 |
+
"support": 277,
|
| 679 |
+
"total_samples": 5192
|
| 680 |
+
}
|
| 681 |
+
}
|
| 682 |
+
}
|
| 683 |
+
}
|
| 684 |
+
}
|
evaluation_results/eval_20250401_143401/plots/per_class_comparison.png
ADDED
|
evaluation_results/eval_20250401_143401/plots/roc_all_classes.png
ADDED
|
Git LFS Details
|
evaluation_results/eval_20250401_143401/plots/roc_by_language.png
ADDED
|
Git LFS Details
|