Spaces:

gokceuludogan
/

TRHatePrint

Running

File size: 6,412 Bytes

5a18336
17f5d64
 
572f0ae
e6fc8aa
572f0ae
 
 
e6fc8aa
572f0ae
 
 
6273501
5a18336
572f0ae
 
5a18336
572f0ae
 
5a18336
 
572f0ae
 
5a18336
 
be22fa8
 
 
 
 
11a5b62
 
 
 
 
 
 
 
be22fa8
 
11a5b62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be22fa8
 
 
17f5d64
 
 
31487be
17f5d64
 
572f0ae
 
73ec11f
17f5d64
 
572f0ae
be22fa8
5a18336
be22fa8
11a5b62
be22fa8
5a18336
11a5b62
 
 
8b6ab08
11a5b62
 
 
 
572f0ae
8b6ab08
572f0ae
5a18336
572f0ae
 
5a18336
572f0ae
 
 
5a18336
572f0ae
5a18336
572f0ae
5a18336
572f0ae
 
81fc300
11a5b62
572f0ae
 
 
 
 
 
 
 
 
 
 
 
 
 
5a18336
572f0ae
5a18336
ccc828d
572f0ae
 
 
 
 
be22fa8
572f0ae
 
be22fa8
572f0ae
 
5a18336
572f0ae
5a18336
572f0ae
 
 
 
 
 
 
 
be22fa8
572f0ae
 
8b6ab08
572f0ae
8b6ab08
572f0ae
 
 
 
 
 
 
 
be22fa8
572f0ae

import gradio as gr
from transformers import pipeline, AutoTokenizer
from turkish_lm_tuner import T5ForClassification
import os

# Retrieve Hugging Face authentication token from environment variables
hf_auth_token = os.getenv('HF_AUTH_TOKEN')
print(hf_auth_token)

# Example inputs for the different tasks
binary_classification_examples = [["Yahudi terörüne karşı protestolar kararlılıkla devam ediyor."]]
categorization_examples = [["Ermeni zulmü sırasında hayatını kaybeden kadınlar anısına dikilen anıt ziyarete açıldı."]]
target_detection_examples = [["Dün 5 bin suriyeli enik doğmuştur zaten Türkiyede 5 bin suriyelinin gitmesi çok çok az"]]

# Application description and citation placeholder
APP_DESCRIPTION = """
## Hate Speech Detection in Turkish News

This tool performs hate speech detection across several tasks, including binary classification, categorization, and target detection. Choose a model and input text to analyze its hatefulness, categorize it, or detect targets of hate speech.
"""

APP_CITATION = """
For citation, please refer to the tool's documentation.
"""

binary_mapping = {
    'LABEL_0': 'non-hateful', 
    'LABEL_1': 'hateful', 
}

# category_mapping = {
#     'LABEL_0': 'non-hateful', 
#     'LABEL_1': 'symbolization', 
#     'LABEL_2': 'exaggeration/generalization/attribution/distortion', 
#     'LABEL_3': 'swearing/insult/defamation/dehumanization',      
#     'LABEL_4': 'threat of enmity/war/attack/murder/harm',     
# }

category_mapping = {
    'LABEL_0': 'non-hateful', 
    'LABEL_1': 'symbolization/exaggeration/generalization/attribution/distortion', 
    'LABEL_2': 'swearing/insult/defamation/dehumanization/threat of enmity/war/attack/murder/harm',   
}

target_mapping = {
  'LABEL_0': 'No-group',
  'LABEL_1': 'Refugees',
  'LABEL_2': 'Israel-Jews',
  'LABEL_3': 'Greeks',
  'LABEL_4': 'Armenian',
  'LABEL_5': 'Alevi',
  'LABEL_6': 'Kurdish',
  'LABEL_7': 'Arabian',
  'LABEL_8': 'LGBTI+',
  'LABEL_9': 'Women',
  'LABEL_10': 'Other groups'
}


def inference_t5(input_text, selected_model):
    model = T5ForClassification.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
    tokenizer = AutoTokenizer.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
    return model(**tokenizer(input_text, return_tensors='pt')).logits

    
# Functions for model-based tasks
def perform_binary_classification(input_text, selected_model):
    if (selected_model is not None) and ('turna' in selected_model):
        return inference_t5(input_text, selected_model)
        
    model = pipeline(model=f'gokceuludogan/{selected_model}')
    return binary_mapping.get(model(input_text)[0]['label'], 'error')

def perform_categorization(input_text):
    model = pipeline(model='gokceuludogan/berturk_tr_hateprint_cat_class_w0.1_b128') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
    return category_mapping.get(model(input_text)[0]['label'], 'error')

# def perform_target_detection(input_text):
#     model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_target')
#     return model(input_text)[0]['generated_text']

def perform_target_detection(input_text):
    model = pipeline(model='gokceuludogan/berturk_tr_hateprint_target_class_w0.1') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
    return target_mapping.get(model(input_text)[0]['label'], 'error')
    
def perform_multi_detection(input_text):
    model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_multi')
    return model(input_text)[0]['generated_text']

# Gradio interface
with gr.Blocks(theme="abidlabs/Lime") as hate_speech_demo:

    # Main description
    with gr.Tab("About"):
        gr.Markdown(APP_DESCRIPTION)

    # Binary Classification Tab
    with gr.Tab("Binary Classification"):
        gr.Markdown("Analyze the hatefulness of a given text using selected models.")
        with gr.Column():
            model_choice_binary = gr.Radio(
                choices=[
                    "turna_tr_hateprint_w0.1_new_", 
                    "berturk_tr_hateprint_w0.1_b128_v2", # "berturk_tr_hateprint_w0.1", 
                ], 
                label="Select Model", 
                value="turna_tr_hateprint"
            )
            text_input_binary = gr.Textbox(label="Input Text")
            classify_button = gr.Button("Analyze")
            classification_output = gr.Textbox(label="Classification Result")
            classify_button.click(
                perform_binary_classification, 
                inputs=[text_input_binary, model_choice_binary], 
                outputs=classification_output
            )

    # Hate Speech Categorization Tab
    with gr.Tab("Hate Speech Categorization"):
        gr.Markdown("Categorize the hate speech type in the provided text.")
        with gr.Column():
        
            text_input_category = gr.Textbox(label="Input Text")
            categorize_button = gr.Button("Categorize")
            categorization_output = gr.Textbox(label="Categorization Result")
            categorize_button.click(
                perform_categorization, 
                inputs=[text_input_category], 
                outputs=categorization_output
            )


    # Target Detection Tab
    with gr.Tab("Target Detection"):
        gr.Markdown("Detect the targets of hate speech in the provided text.")
        with gr.Column():
            text_input_target = gr.Textbox(label="Input Text")
            target_button = gr.Button("Detect Targets")
            target_output = gr.Textbox(label="Target Detection Result")
            target_button.click(
                perform_target_detection, 
                inputs=[text_input_target], 
                outputs=target_output
            )


    # Multi Detection Tab
    with gr.Tab("Multi Detection"):
        gr.Markdown("Detect hate speech, its category, and its targets in the text.")
        with gr.Column():
            text_input_multi = gr.Textbox(label="Input Text")
            multi_button = gr.Button("Detect All")
            multi_output = gr.Textbox(label="Multi Detection Result")
            multi_button.click(
                perform_multi_detection, 
                inputs=[text_input_multi], 
                outputs=multi_output
            )


    # Citation Section
    gr.Markdown(APP_CITATION)

# Launch the application
hate_speech_demo.launch()