Spaces:
Running
Running
import json | |
import torch | |
from transformers import BertTokenizer, BertForSequenceClassification | |
import gradio as gr | |
model_name = "ifmain/ModerationBERT-En-02" | |
tokenizer = BertTokenizer.from_pretrained(model_name) | |
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=18) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
model.to(device) | |
categories = [ | |
'harassment', 'harassment_threatening', 'hate', 'hate_threatening', | |
'self_harm', 'self_harm_instructions', 'self_harm_intent', 'sexual', | |
'sexual_minors', 'violence', 'violence_graphic', 'self-harm', | |
'sexual/minors', 'hate/threatening', 'violence/graphic', | |
'self-harm/intent', 'self-harm/instructions', 'harassment/threatening' | |
] | |
def predict_moderation(text): | |
encoding = tokenizer.encode_plus( | |
text, | |
add_special_tokens=True, | |
max_length=128, | |
return_token_type_ids=False, | |
padding='max_length', | |
truncation=True, | |
return_attention_mask=True, | |
return_tensors='pt' | |
) | |
input_ids = encoding['input_ids'].to(device) | |
attention_mask = encoding['attention_mask'].to(device) | |
model.eval() | |
with torch.no_grad(): | |
outputs = model(input_ids, attention_mask=attention_mask) | |
probs = torch.sigmoid(outputs.logits)[0].cpu().numpy() | |
category_scores = {categories[i]: float(probs[i]) for i in range(len(categories))} | |
detected = any(prob > 0.5 for prob in probs) | |
return category_scores, str(detected) | |
iface = gr.Interface( | |
fn=predict_moderation, | |
inputs=gr.Textbox(label="Enter text"), | |
outputs=[ | |
gr.Label(label="Ratings by category"), | |
gr.Label(label="Was a violation detected?") | |
], | |
title="Text moderation", | |
description="Enter text to check it for content violations (ModerationBERT-En-02 model)." | |
) | |
iface.launch() |