Spaces:

jinzo94
/

xpoll

Sleeping

File size: 2,426 Bytes

0688f83
bc58cec
82d632c
7e5e60c
bc58cec
 
7e5e60c
 
 
 
 
 
 
 
 
 
 
 
bc58cec
0688f83
 
 
bc58cec
7e5e60c
0688f83
7e5e60c
 
 
 
 
 
 
 
 
 
 
 
 
 
bc58cec
7e5e60c
 
 
 
 
 
 
 
 
 
 
bc58cec
0688f83
bc58cec
0688f83
 
7e5e60c
0688f83
7e5e60c
 
 
 
5d06ed0
 
0688f83
bc58cec
0688f83

import gradio as gr
from transformers import pipeline

# Load language detection model
lang_classifier = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")

# Load translation model (multi-language to English)
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M")

# Load hate speech detection model
offensive_classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive")

# Mapping from ISO 639-1 to NLLB-200 language codes
LANGUAGE_CODES = {
    "en": "eng_Latn", "fr": "fra_Latn", "es": "spa_Latn", "de": "deu_Latn",
    "bg": "bul_Cyrl", "ru": "rus_Cyrl", "it": "ita_Latn", "zh": "zho_Hans", 
    "ar": "arb_Arab", "pt": "por_Latn", "nl": "nld_Latn", "hi": "hin_Deva"
}

def analyze_text(text):
    if not text.strip():
        return {"error": "No text provided"}, {"error": "No text provided"}

    # Detect language
    lang_result = lang_classifier(text)
    detected_language = lang_result[0]['label']
    language_confidence = lang_result[0]['score']
    
    # Convert detected language to NLLB-200 format
    detected_language_nllb = LANGUAGE_CODES.get(detected_language, "eng_Latn")
    
    # Translate if not English
    translated_text = text
    if detected_language_nllb != "eng_Latn":
        translation_result = translator(text, src_lang=detected_language_nllb, tgt_lang="eng_Latn")
        translated_text = translation_result[0]['translation_text']
    
    # Detect hate speech using the translated text
    hate_result = offensive_classifier(translated_text)

    language_output = {
        "language": detected_language,
        "confidence": language_confidence,
        "original_text": text,
        "translated_text": translated_text if detected_language_nllb != "eng_Latn" else "Already in English"
    }
    
    hate_output = {
        "label": hate_result[0]['label'],
        "score": hate_result[0]['score']
    }

    return language_output, hate_output

# Define the Gradio interface
iface = gr.Interface(
    fn=analyze_text,
    inputs=gr.Textbox(label="Enter text"),
    outputs=[
        gr.JSON(label="Language Detection & Translation"),
        gr.JSON(label="Hate Speech Detection")
    ],
    title="Detect language, translate, and check for offensive speech",
    description="Enter text..."
)

# Launch the Gradio app
iface.launch(server_name="0.0.0.0", server_port=7860, share=True)