pfialho's picture
Update app.py
72a8b62 verified
raw
history blame
3.84 kB
import gradio as gr
from transformers import TFBertModel, TFXLMRobertaModel
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer
from huggingface_hub import from_pretrained_keras
app_title = "Portuguese Counter Hate Speech Detection"
app_description = """
This prototype from the kNOwHATE project aims to classify a Portuguese target sentence as either hate speech, counter hate speech or neutral, considering another sentence as context.
We collected 24,739 YouTube comments and 29,846 tweets, annotated by experts, and trained our prototype on this data.
We invite you to try it out. You can just enter a pair of sentences below, one as target and another as context, and submit it to see if the target is either hate speech, counter hate speech or neutral, relative to the context.
For more, visit our [website](https://knowhate.eu) and [Hugging Face page](https://huggingface.co/knowhate).
"""
def_model = 'knowhate/counterhate-youtube-hateberttuga'
model_list = [
def_model,
"knowhate/counterhate-youtube-bertimbau"
]
# "knowhate/counterhate-twitter-xlmrobertabase",
# "knowhate/counterhate-twitter-bertbasemultilingualcased",
# "knowhate/counterhate-twitter-hateberttuga",
# "knowhate/counterhate-youtube-hateberttuga",
kw_to_hf = {"knowhate/counterhate-twitter-bertimbau": "neuralmind/bert-base-portuguese-cased",
"knowhate/counterhate-youtube-bertimbau": "neuralmind/bert-base-portuguese-cased",
"knowhate/counterhate-twitter-xlmrobertabase": "xlm-roberta-base",
"knowhate/counterhate-twitter-bertbasemultilingualcased": "bert-base-multilingual-cased",
"knowhate/counterhate-youtube-hateberttuga": "knowhate/hateberttuga",
"knowhate/counterhate-twitter-hateberttuga": "knowhate/hateberttuga"}
# 1 0 2
app_examples = [
["Tudo apoiantes do lula livre que o bloco de esterco anda a importar para cá.",
"Sim, têm um presidente ditador. E se houver muita gente a pensar como o senhor, aqui acontecerá a mesma coisa.",
def_model],
["\"Não acredites em tudo o que lês na Internet\" - Abraham Lincoln",
"A Internet foi desenvolvida entre os anos 1973-1989.",
def_model],
["Então o Marcelo foi ao Qatar para 'falar de direitos humanos', mas não foi a Odemira?",
"esse retardado mental, foi a praia do katar, la tem a agua mais kentinha.",
def_model]
]
def predict(text, target, chosen_model):
# model1 = tf.keras.models.load_model(chosen_model, custom_objects={"TFBertModel": TFBertModel})
model1 = from_pretrained_keras(chosen_model)
checkpoint = kw_to_hf[chosen_model] # "neuralmind/bert-base-portuguese-cased"
if '/' in checkpoint:
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True, model_max_length=512)
else:
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)
tokpair = tokenizer(text, target, truncation=True, padding='max_length', return_tensors='tf', return_token_type_ids=False)
outp = model1.signatures["serving_default"](**tokpair)
proto_tensor = tf.make_tensor_proto(outp['outp'])
allscores = tf.make_ndarray(proto_tensor)[0]
scores_dict = {
'Neutral': allscores[0],
'Counter Speech': allscores[1],
'Hate Speech': allscores[2]
}
return scores_dict
inputs = [
gr.Textbox(label="Context", value= app_examples[0][0]),
gr.Textbox(label="Target", value= app_examples[0][1]),
gr.Dropdown(label="Model", choices=model_list, value=model_list[0])
]
outputs = [
gr.Label(label="Result"),
]
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
description=app_description, examples=app_examples, theme=gr.themes.Base(primary_hue="red")).launch()