Spaces:
Running
Running
File size: 6,412 Bytes
5a18336 17f5d64 572f0ae e6fc8aa 572f0ae e6fc8aa 572f0ae 6273501 5a18336 572f0ae 5a18336 572f0ae 5a18336 572f0ae 5a18336 be22fa8 11a5b62 be22fa8 11a5b62 be22fa8 17f5d64 31487be 17f5d64 572f0ae 73ec11f 17f5d64 572f0ae be22fa8 5a18336 be22fa8 11a5b62 be22fa8 5a18336 11a5b62 8b6ab08 11a5b62 572f0ae 8b6ab08 572f0ae 5a18336 572f0ae 5a18336 572f0ae 5a18336 572f0ae 5a18336 572f0ae 5a18336 572f0ae 81fc300 11a5b62 572f0ae 5a18336 572f0ae 5a18336 ccc828d 572f0ae be22fa8 572f0ae be22fa8 572f0ae 5a18336 572f0ae 5a18336 572f0ae be22fa8 572f0ae 8b6ab08 572f0ae 8b6ab08 572f0ae be22fa8 572f0ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
from turkish_lm_tuner import T5ForClassification
import os
# Retrieve Hugging Face authentication token from environment variables
hf_auth_token = os.getenv('HF_AUTH_TOKEN')
print(hf_auth_token)
# Example inputs for the different tasks
binary_classification_examples = [["Yahudi terörüne karşı protestolar kararlılıkla devam ediyor."]]
categorization_examples = [["Ermeni zulmü sırasında hayatını kaybeden kadınlar anısına dikilen anıt ziyarete açıldı."]]
target_detection_examples = [["Dün 5 bin suriyeli enik doğmuştur zaten Türkiyede 5 bin suriyelinin gitmesi çok çok az"]]
# Application description and citation placeholder
APP_DESCRIPTION = """
## Hate Speech Detection in Turkish News
This tool performs hate speech detection across several tasks, including binary classification, categorization, and target detection. Choose a model and input text to analyze its hatefulness, categorize it, or detect targets of hate speech.
"""
APP_CITATION = """
For citation, please refer to the tool's documentation.
"""
binary_mapping = {
'LABEL_0': 'non-hateful',
'LABEL_1': 'hateful',
}
# category_mapping = {
# 'LABEL_0': 'non-hateful',
# 'LABEL_1': 'symbolization',
# 'LABEL_2': 'exaggeration/generalization/attribution/distortion',
# 'LABEL_3': 'swearing/insult/defamation/dehumanization',
# 'LABEL_4': 'threat of enmity/war/attack/murder/harm',
# }
category_mapping = {
'LABEL_0': 'non-hateful',
'LABEL_1': 'symbolization/exaggeration/generalization/attribution/distortion',
'LABEL_2': 'swearing/insult/defamation/dehumanization/threat of enmity/war/attack/murder/harm',
}
target_mapping = {
'LABEL_0': 'No-group',
'LABEL_1': 'Refugees',
'LABEL_2': 'Israel-Jews',
'LABEL_3': 'Greeks',
'LABEL_4': 'Armenian',
'LABEL_5': 'Alevi',
'LABEL_6': 'Kurdish',
'LABEL_7': 'Arabian',
'LABEL_8': 'LGBTI+',
'LABEL_9': 'Women',
'LABEL_10': 'Other groups'
}
def inference_t5(input_text, selected_model):
model = T5ForClassification.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
tokenizer = AutoTokenizer.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
return model(**tokenizer(input_text, return_tensors='pt')).logits
# Functions for model-based tasks
def perform_binary_classification(input_text, selected_model):
if (selected_model is not None) and ('turna' in selected_model):
return inference_t5(input_text, selected_model)
model = pipeline(model=f'gokceuludogan/{selected_model}')
return binary_mapping.get(model(input_text)[0]['label'], 'error')
def perform_categorization(input_text):
model = pipeline(model='gokceuludogan/berturk_tr_hateprint_cat_class_w0.1_b128') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
return category_mapping.get(model(input_text)[0]['label'], 'error')
# def perform_target_detection(input_text):
# model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_target')
# return model(input_text)[0]['generated_text']
def perform_target_detection(input_text):
model = pipeline(model='gokceuludogan/berturk_tr_hateprint_target_class_w0.1') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
return target_mapping.get(model(input_text)[0]['label'], 'error')
def perform_multi_detection(input_text):
model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_multi')
return model(input_text)[0]['generated_text']
# Gradio interface
with gr.Blocks(theme="abidlabs/Lime") as hate_speech_demo:
# Main description
with gr.Tab("About"):
gr.Markdown(APP_DESCRIPTION)
# Binary Classification Tab
with gr.Tab("Binary Classification"):
gr.Markdown("Analyze the hatefulness of a given text using selected models.")
with gr.Column():
model_choice_binary = gr.Radio(
choices=[
"turna_tr_hateprint_w0.1_new_",
"berturk_tr_hateprint_w0.1_b128_v2", # "berturk_tr_hateprint_w0.1",
],
label="Select Model",
value="turna_tr_hateprint"
)
text_input_binary = gr.Textbox(label="Input Text")
classify_button = gr.Button("Analyze")
classification_output = gr.Textbox(label="Classification Result")
classify_button.click(
perform_binary_classification,
inputs=[text_input_binary, model_choice_binary],
outputs=classification_output
)
# Hate Speech Categorization Tab
with gr.Tab("Hate Speech Categorization"):
gr.Markdown("Categorize the hate speech type in the provided text.")
with gr.Column():
text_input_category = gr.Textbox(label="Input Text")
categorize_button = gr.Button("Categorize")
categorization_output = gr.Textbox(label="Categorization Result")
categorize_button.click(
perform_categorization,
inputs=[text_input_category],
outputs=categorization_output
)
# Target Detection Tab
with gr.Tab("Target Detection"):
gr.Markdown("Detect the targets of hate speech in the provided text.")
with gr.Column():
text_input_target = gr.Textbox(label="Input Text")
target_button = gr.Button("Detect Targets")
target_output = gr.Textbox(label="Target Detection Result")
target_button.click(
perform_target_detection,
inputs=[text_input_target],
outputs=target_output
)
# Multi Detection Tab
with gr.Tab("Multi Detection"):
gr.Markdown("Detect hate speech, its category, and its targets in the text.")
with gr.Column():
text_input_multi = gr.Textbox(label="Input Text")
multi_button = gr.Button("Detect All")
multi_output = gr.Textbox(label="Multi Detection Result")
multi_button.click(
perform_multi_detection,
inputs=[text_input_multi],
outputs=multi_output
)
# Citation Section
gr.Markdown(APP_CITATION)
# Launch the application
hate_speech_demo.launch()
|