File size: 6,412 Bytes
5a18336
17f5d64
 
572f0ae
e6fc8aa
572f0ae
 
 
e6fc8aa
572f0ae
 
 
6273501
5a18336
572f0ae
 
5a18336
572f0ae
 
5a18336
 
572f0ae
 
5a18336
 
be22fa8
 
 
 
 
11a5b62
 
 
 
 
 
 
 
be22fa8
 
11a5b62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be22fa8
 
 
17f5d64
 
 
31487be
17f5d64
 
572f0ae
 
73ec11f
17f5d64
 
572f0ae
be22fa8
5a18336
be22fa8
11a5b62
be22fa8
5a18336
11a5b62
 
 
8b6ab08
11a5b62
 
 
 
572f0ae
8b6ab08
572f0ae
5a18336
572f0ae
 
5a18336
572f0ae
 
 
5a18336
572f0ae
5a18336
572f0ae
5a18336
572f0ae
 
81fc300
11a5b62
572f0ae
 
 
 
 
 
 
 
 
 
 
 
 
 
5a18336
572f0ae
5a18336
ccc828d
572f0ae
 
 
 
 
be22fa8
572f0ae
 
be22fa8
572f0ae
 
5a18336
572f0ae
5a18336
572f0ae
 
 
 
 
 
 
 
be22fa8
572f0ae
 
8b6ab08
572f0ae
8b6ab08
572f0ae
 
 
 
 
 
 
 
be22fa8
572f0ae
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
from transformers import pipeline, AutoTokenizer
from turkish_lm_tuner import T5ForClassification
import os

# Retrieve Hugging Face authentication token from environment variables
hf_auth_token = os.getenv('HF_AUTH_TOKEN')
print(hf_auth_token)

# Example inputs for the different tasks
binary_classification_examples = [["Yahudi terörüne karşı protestolar kararlılıkla devam ediyor."]]
categorization_examples = [["Ermeni zulmü sırasında hayatını kaybeden kadınlar anısına dikilen anıt ziyarete açıldı."]]
target_detection_examples = [["Dün 5 bin suriyeli enik doğmuştur zaten Türkiyede 5 bin suriyelinin gitmesi çok çok az"]]

# Application description and citation placeholder
APP_DESCRIPTION = """
## Hate Speech Detection in Turkish News

This tool performs hate speech detection across several tasks, including binary classification, categorization, and target detection. Choose a model and input text to analyze its hatefulness, categorize it, or detect targets of hate speech.
"""

APP_CITATION = """
For citation, please refer to the tool's documentation.
"""

binary_mapping = {
    'LABEL_0': 'non-hateful', 
    'LABEL_1': 'hateful', 
}

# category_mapping = {
#     'LABEL_0': 'non-hateful', 
#     'LABEL_1': 'symbolization', 
#     'LABEL_2': 'exaggeration/generalization/attribution/distortion', 
#     'LABEL_3': 'swearing/insult/defamation/dehumanization',      
#     'LABEL_4': 'threat of enmity/war/attack/murder/harm',     
# }

category_mapping = {
    'LABEL_0': 'non-hateful', 
    'LABEL_1': 'symbolization/exaggeration/generalization/attribution/distortion', 
    'LABEL_2': 'swearing/insult/defamation/dehumanization/threat of enmity/war/attack/murder/harm',   
}

target_mapping = {
  'LABEL_0': 'No-group',
  'LABEL_1': 'Refugees',
  'LABEL_2': 'Israel-Jews',
  'LABEL_3': 'Greeks',
  'LABEL_4': 'Armenian',
  'LABEL_5': 'Alevi',
  'LABEL_6': 'Kurdish',
  'LABEL_7': 'Arabian',
  'LABEL_8': 'LGBTI+',
  'LABEL_9': 'Women',
  'LABEL_10': 'Other groups'
}


def inference_t5(input_text, selected_model):
    model = T5ForClassification.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
    tokenizer = AutoTokenizer.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
    return model(**tokenizer(input_text, return_tensors='pt')).logits

    
# Functions for model-based tasks
def perform_binary_classification(input_text, selected_model):
    if (selected_model is not None) and ('turna' in selected_model):
        return inference_t5(input_text, selected_model)
        
    model = pipeline(model=f'gokceuludogan/{selected_model}')
    return binary_mapping.get(model(input_text)[0]['label'], 'error')

def perform_categorization(input_text):
    model = pipeline(model='gokceuludogan/berturk_tr_hateprint_cat_class_w0.1_b128') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
    return category_mapping.get(model(input_text)[0]['label'], 'error')

# def perform_target_detection(input_text):
#     model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_target')
#     return model(input_text)[0]['generated_text']

def perform_target_detection(input_text):
    model = pipeline(model='gokceuludogan/berturk_tr_hateprint_target_class_w0.1') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
    return target_mapping.get(model(input_text)[0]['label'], 'error')
    
def perform_multi_detection(input_text):
    model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_multi')
    return model(input_text)[0]['generated_text']

# Gradio interface
with gr.Blocks(theme="abidlabs/Lime") as hate_speech_demo:

    # Main description
    with gr.Tab("About"):
        gr.Markdown(APP_DESCRIPTION)

    # Binary Classification Tab
    with gr.Tab("Binary Classification"):
        gr.Markdown("Analyze the hatefulness of a given text using selected models.")
        with gr.Column():
            model_choice_binary = gr.Radio(
                choices=[
                    "turna_tr_hateprint_w0.1_new_", 
                    "berturk_tr_hateprint_w0.1_b128_v2", # "berturk_tr_hateprint_w0.1", 
                ], 
                label="Select Model", 
                value="turna_tr_hateprint"
            )
            text_input_binary = gr.Textbox(label="Input Text")
            classify_button = gr.Button("Analyze")
            classification_output = gr.Textbox(label="Classification Result")
            classify_button.click(
                perform_binary_classification, 
                inputs=[text_input_binary, model_choice_binary], 
                outputs=classification_output
            )

    # Hate Speech Categorization Tab
    with gr.Tab("Hate Speech Categorization"):
        gr.Markdown("Categorize the hate speech type in the provided text.")
        with gr.Column():
        
            text_input_category = gr.Textbox(label="Input Text")
            categorize_button = gr.Button("Categorize")
            categorization_output = gr.Textbox(label="Categorization Result")
            categorize_button.click(
                perform_categorization, 
                inputs=[text_input_category], 
                outputs=categorization_output
            )


    # Target Detection Tab
    with gr.Tab("Target Detection"):
        gr.Markdown("Detect the targets of hate speech in the provided text.")
        with gr.Column():
            text_input_target = gr.Textbox(label="Input Text")
            target_button = gr.Button("Detect Targets")
            target_output = gr.Textbox(label="Target Detection Result")
            target_button.click(
                perform_target_detection, 
                inputs=[text_input_target], 
                outputs=target_output
            )


    # Multi Detection Tab
    with gr.Tab("Multi Detection"):
        gr.Markdown("Detect hate speech, its category, and its targets in the text.")
        with gr.Column():
            text_input_multi = gr.Textbox(label="Input Text")
            multi_button = gr.Button("Detect All")
            multi_output = gr.Textbox(label="Multi Detection Result")
            multi_button.click(
                perform_multi_detection, 
                inputs=[text_input_multi], 
                outputs=multi_output
            )


    # Citation Section
    gr.Markdown(APP_CITATION)

# Launch the application
hate_speech_demo.launch()