File size: 2,426 Bytes
0688f83
bc58cec
82d632c
7e5e60c
bc58cec
 
7e5e60c
 
 
 
 
 
 
 
 
 
 
 
bc58cec
0688f83
 
 
bc58cec
7e5e60c
0688f83
7e5e60c
 
 
 
 
 
 
 
 
 
 
 
 
 
bc58cec
7e5e60c
 
 
 
 
 
 
 
 
 
 
bc58cec
0688f83
bc58cec
0688f83
 
7e5e60c
0688f83
7e5e60c
 
 
 
5d06ed0
 
0688f83
bc58cec
0688f83
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from transformers import pipeline

# Load language detection model
lang_classifier = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")

# Load translation model (multi-language to English)
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M")

# Load hate speech detection model
offensive_classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive")

# Mapping from ISO 639-1 to NLLB-200 language codes
LANGUAGE_CODES = {
    "en": "eng_Latn", "fr": "fra_Latn", "es": "spa_Latn", "de": "deu_Latn",
    "bg": "bul_Cyrl", "ru": "rus_Cyrl", "it": "ita_Latn", "zh": "zho_Hans", 
    "ar": "arb_Arab", "pt": "por_Latn", "nl": "nld_Latn", "hi": "hin_Deva"
}

def analyze_text(text):
    if not text.strip():
        return {"error": "No text provided"}, {"error": "No text provided"}

    # Detect language
    lang_result = lang_classifier(text)
    detected_language = lang_result[0]['label']
    language_confidence = lang_result[0]['score']
    
    # Convert detected language to NLLB-200 format
    detected_language_nllb = LANGUAGE_CODES.get(detected_language, "eng_Latn")
    
    # Translate if not English
    translated_text = text
    if detected_language_nllb != "eng_Latn":
        translation_result = translator(text, src_lang=detected_language_nllb, tgt_lang="eng_Latn")
        translated_text = translation_result[0]['translation_text']
    
    # Detect hate speech using the translated text
    hate_result = offensive_classifier(translated_text)

    language_output = {
        "language": detected_language,
        "confidence": language_confidence,
        "original_text": text,
        "translated_text": translated_text if detected_language_nllb != "eng_Latn" else "Already in English"
    }
    
    hate_output = {
        "label": hate_result[0]['label'],
        "score": hate_result[0]['score']
    }

    return language_output, hate_output

# Define the Gradio interface
iface = gr.Interface(
    fn=analyze_text,
    inputs=gr.Textbox(label="Enter text"),
    outputs=[
        gr.JSON(label="Language Detection & Translation"),
        gr.JSON(label="Hate Speech Detection")
    ],
    title="Detect language, translate, and check for offensive speech",
    description="Enter text..."
)

# Launch the Gradio app
iface.launch(server_name="0.0.0.0", server_port=7860, share=True)