File size: 3,810 Bytes
8096aaf
 
 
f89154a
 
8096aaf
25f03e0
 
8096aaf
 
a3271fa
 
8096aaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from collections import Counter
from scipy.special import softmax

article_string = "Author: <a href=\"https://huggingface.co/knowhate\">kNOwHATE</a>. Read more about our <a href=\"https://knowhate.eu/pt-pt\">research on the evaluation of Portuguese language models</a>."

app_title = "Portuguese Hate Speech Detection"

app_description = """ 
This app detects hate speech on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or click on one of the examples provided below.
"""

app_examples = [
    ["as pessoas tem que perceber que ser 'panasca' não é deixar de ser homem, é deixar de ser humano kkk"],
    ["ontem encontrei-me com um amigo meu e tivemos uma conversa agradável"],
]

output_textbox_component_description = """
This box will display the hate speech detection results based on the average score of multiple models.
"""

output_json_component_description = { "breakdown": """
This box presents a detailed breakdown of the evaluation for each model.
"""}

short_score_descriptions = {
   0: "Non Hate Speech",
   1: "Hate Speech"
}

score_descriptions = {
    0: "This text is not Hate Speech.",
    1: "This text is Hate Speech.",
}

model_list = [
    "knowhate/HateBERTimbau",
    "knowhate/HateBERTimbau-youtube",
    "knowhate/HateBERTimbau-twitter",
    "knowhate/HateBERTimbau-yt-tt",
]

user_friendly_name = {
    "knowhate/HateBERTimbau": "HateBERTimbau (Original)",
    "knowhate/HateBERTimbau-youtube": "HateBERTimbau (YouTube)",
    "knowhate/HateBERTimbau-twitter": "HateBERTimbau (Twitter)",
    "knowhate/HateBERTimbau-yt-tt": "HateBERTimbau (YouTube + Twitter)",
}

reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }

user_friendly_name_list = list(user_friendly_name.values())

model_array = []

for model_name in model_list:
    row = {}
    row["name"] = model_name
    row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
    row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
    model_array.append(row)
 
def most_frequent(array):
    occurence_count = Counter(array)
    return occurence_count.most_common(1)[0][0]


def predict(s1, chosen_model):
    if not chosen_model:
      chosen_model = user_friendly_name_list[0]
    scores = {}
    full_chosen_model_name = reverse_user_friendly_name[chosen_model]
    for row in model_array:
        name = row["name"]
        if name != full_chosen_model_name:
          continue
        else:
          tokenizer = row["tokenizer"]
          model = row["model"]
          model_input = tokenizer(*([s1],), padding=True, return_tensors="pt")
          with torch.no_grad():
              output = model(**model_input)
              logits = output[0][0].detach().numpy()
              logits = softmax(logits).tolist()
              break
    def get_description(idx):
      description = score_descriptions[idx]
      description_pt = score_descriptions_pt[idx]
      final_description = description + "\n \n" + description_pt
      return final_description
    
    max_pos = logits.index(max(logits))
    markdown_description = get_description(max_pos)
    scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) }

    return scores, markdown_description


inputs = [
    gr.Textbox(label="Text", value=app_examples[0][0]),
    gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
]

outputs = [
 gr.Label(label="Result"),
 gr.Markdown(),
]


gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, 
             description=app_description,
             examples=app_examples,
             article = article_string).launch()