File size: 4,620 Bytes
7420aa9
 
 
 
 
 
 
 
 
 
 
 
 
efd38a2
 
7420aa9
 
efd38a2
 
7420aa9
 
 
 
 
efd38a2
7420aa9
 
efd38a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7420aa9
 
 
 
 
 
efd38a2
 
7420aa9
efd38a2
 
 
 
7420aa9
efd38a2
 
 
 
 
 
7420aa9
efd38a2
7420aa9
efd38a2
 
7420aa9
efd38a2
 
7420aa9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""Gradio app that showcases Scandinavian zero-shot text classification models."""

import gradio as gr
from transformers import pipeline
from luga import language as detect_language


# Load the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification", model="alexandrainst/scandi-nli-large"
)


def classification(task: str, doc: str) -> str:
    """Classify text into categories.

    Args:
        task (str):
            Task to perform.
        doc (str):
            Text to classify.

    Returns:
        str:
            The predicted label.
    """
    # Detect the language of the text
    language = detect_language(doc.replace('\n', ' ')).name

    # Define the confidence string based on the language
    if language == "sv" or language == "no":
        confidence_str = "konfidensnivå"
    else:
        confidence_str = "konfidensniveau"

    # If the task is sentiment, classify the text into positive, negative or neutral
    if task == "Sentiment classification":
        if language == "sv":
            hypothesis_template = "Detta exempel är {}."
            candidate_labels = ["positivt", "negativt", "neutralt"]
        elif language == "no":
            hypothesis_template = "Dette eksemplet er {}."
            candidate_labels = ["positivt", "negativt", "nøytralt"]
        else:
            hypothesis_template = "Dette eksempel er {}."
            candidate_labels = ["positivt", "negativt", "neutralt"]

    # Else if the task is topic, classify the text into a topic
    elif task == "News topic classification":
        if language == "sv":
            hypothesis_template = "Detta exempel handlar om {}."
            candidate_labels = [
                "krig",
                "regering",
                "politik",
                "utbildning",
                "hälsa",
                "miljö",
                "ekonomi",
                "affärer",
                "mode",
                "underhållning",
                "sport",
            ]
        elif language == "no":
            hypothesis_template = "Dette eksemplet handler om {}."
            candidate_labels = [
                "krig",
                "myndighetene",
                "politikk",
                "utdanning",
                "helse",
                "miljø",
                "økonomi",
                "virksomhet",
                "mote",
                "underholdning",
                "sport",
            ]
        else:
            hypothesis_template = "Denne nyhedsartikel handler primært om {}."
            candidate_labels = [
                "krig",
                "regering",
                "politik",
                "uddannelse",
                "sundhed",
                "miljø",
                "økonomi",
                "forretning",
                "mode",
                "underholdning",
                "sport",
            ]

    # Else if the task is offensive text detection, classify the text into offensive
    # or not offensive
    elif task == "Offensive text detection":
        if language == "sv":
            hypothesis_template = "Detta exempel er {}."
            candidate_labels = ["stötande", "inte stötande"]
        elif language == "no":
            hypothesis_template = "Dette eksemplet er {}."
            candidate_labels = ["støtende", "ikke støtende"]
        else:
            hypothesis_template = "Dette eksempel er {}."
            candidate_labels = ["anstødig tale", "ikke anstødig tale"]

    # Else the task is not supported, so raise an error
    else:
        raise ValueError(f"Task {task} not supported.")

    # Run the classifier on the text
    result = classifier(
        doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
    )

    print(result)

    # Return the predicted label
    return (
        f"{result['labels'][0].capitalize()}\n"
        f"({confidence_str}: {result['scores'][0]:.0%})"
    )

# Create a dropdown menu for the task
dropdown = gr.inputs.Dropdown(
    label="Task",
    choices=["Sentiment classification", "News topic classification", "Offensive text detection"],
    default="Sentiment classification",
)

# Create the interface, where the function depends on the task chosen
interface = gr.Interface(
    fn=classification,
    inputs=[dropdown, gr.inputs.Textbox(label="Text")],
    outputs=gr.outputs.Label(type="text"),
    title="Scandinavian zero-shot text classification",
    description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!",
)

# Run the app
interface.launch()