File size: 3,847 Bytes
b72431a
4485599
 
 
 
3015f8c
 
b72431a
382c5c3
b72431a
d120f1f
382c5c3
3416a77
382c5c3
3416a77
382c5c3
3416a77
382c5c3
b72431a
d120f1f
1beacf2
0325f6e
382c5c3
 
 
 
d7ba310
 
 
382c5c3
 
 
d7ba310
382c5c3
 
d7ba310
c909d5a
382c5c3
4485599
d120f1f
4485599
 
0325f6e
4485599
 
0325f6e
4485599
 
0325f6e
d120f1f
 
 
0fd88df
 
 
382c5c3
 
 
1b73302
382c5c3
 
0fd88df
d120f1f
0fd88df
d120f1f
0fd88df
4485599
 
d120f1f
4485599
 
 
d120f1f
 
 
 
 
b2414b4
 
 
d120f1f
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
from transformers import TFBertModel, TFXLMRobertaModel
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer
from huggingface_hub import from_pretrained_keras


app_title = "Portuguese Counter Hate Speech Detection"

app_description = """
This prototype from the kNOwHATE project aims to classify a Portuguese target sentence as either hate speech, counter hate speech or neutral, considering another sentence as context. 

We collected 24,739 YouTube comments and 29,846 tweets, annotated by experts, and trained our prototype on this data. 

We invite you to try it out. You can just enter a pair of sentences below, one as target and another as context, and submit it to see if the target is either hate speech, counter hate speech or neutral, relative to the context.

For more, visit our [website](https://knowhate.eu) and [Hugging Face page](https://huggingface.co/knowhate).
"""

def_model = 'knowhate/counterhate-twitter-bertimbau'

model_list = [
    def_model, 
    "knowhate/counterhate-twitter-xlmrobertabase",
    "knowhate/counterhate-twitter-bertbasemultilingualcased",
    "knowhate/counterhate-twitter-hateberttuga",
    "knowhate/counterhate-youtube-hateberttuga",
    "knowhate/counterhate-youtube-bertimbau"
]

kw_to_hf = {"knowhate/counterhate-twitter-bertimbau": "neuralmind/bert-base-portuguese-cased", 
            "knowhate/counterhate-youtube-bertimbau": "neuralmind/bert-base-portuguese-cased", 
            "knowhate/counterhate-twitter-xlmrobertabase": "xlm-roberta-base", 
            "knowhate/counterhate-twitter-bertbasemultilingualcased": "bert-base-multilingual-cased", 
            "knowhate/counterhate-youtube-hateberttuga": "knowhate/hateberttuga",
            "knowhate/counterhate-twitter-hateberttuga": "knowhate/hateberttuga"}

# 1 0 2
app_examples = [
    ["Essa gente tem é de deixar de ser apaparicada pelo Estado e começar a cumprir os seus deveres como cidadãos", 
     "Nepia o que faz com que as pessoas generalizem é o ódio intrínseco que têm contra uma etnia, ng é responsável pela sua xenofobia", 
     def_model],
    ["Nem vou comentar o hate e misoginia que tenho visto aqui no tt em relação à Anitta", 
     "E xenofobia também. Tugas no seu melhor", 
     def_model],
    ["A Festa tá no Climax, chama o zuca pra Dançar.", 
     "Já reparaste no contador da luz? Vai trabalhar malandro", 
     def_model]
]

def predict(text, target, chosen_model):    
    # model1 = tf.keras.models.load_model(chosen_model, custom_objects={"TFBertModel": TFBertModel})
    model1 = from_pretrained_keras(chosen_model)
    
    checkpoint = kw_to_hf[chosen_model]   # "neuralmind/bert-base-portuguese-cased"
    if '/' in checkpoint:
        tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True, model_max_length=512)
    else:
        tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)
        
    tokpair = tokenizer(text, target, truncation=True, padding='max_length', return_tensors='tf', return_token_type_ids=False)
    
    outp = model1.signatures["serving_default"](**tokpair)
    
    proto_tensor = tf.make_tensor_proto(outp['outp'])
    allscores = tf.make_ndarray(proto_tensor)[0]
        
    scores_dict = {
        'Neutral': allscores[0],
        'Counter Speech': allscores[1],
        'Hate Speech': allscores[2]
    }
    
    return scores_dict

inputs = [
    gr.Textbox(label="Context", value= app_examples[0][0]),
    gr.Textbox(label="Target", value= app_examples[0][1]),
    gr.Dropdown(label="Model", choices=model_list, value=model_list[0])
]

outputs = [
    gr.Label(label="Result"),
]

gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, 
             description=app_description, examples=app_examples, theme=gr.themes.Base(primary_hue="red")).launch()