Spaces:

alexandrainst
/

zero-shot-classification

Running

File size: 4,620 Bytes

"""Gradio app that showcases Scandinavian zero-shot text classification models."""

import gradio as gr
from transformers import pipeline
from luga import language as detect_language


# Load the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification", model="alexandrainst/scandi-nli-large"
)


def classification(task: str, doc: str) -> str:
    """Classify text into categories.

    Args:
        task (str):
            Task to perform.
        doc (str):
            Text to classify.

    Returns:
        str:
            The predicted label.
    """
    # Detect the language of the text
    language = detect_language(doc.replace('\n', ' ')).name

    # Define the confidence string based on the language
    if language == "sv" or language == "no":
        confidence_str = "konfidensnivå"
    else:
        confidence_str = "konfidensniveau"

    # If the task is sentiment, classify the text into positive, negative or neutral
    if task == "Sentiment classification":
        if language == "sv":
            hypothesis_template = "Detta exempel är {}."
            candidate_labels = ["positivt", "negativt", "neutralt"]
        elif language == "no":
            hypothesis_template = "Dette eksemplet er {}."
            candidate_labels = ["positivt", "negativt", "nøytralt"]
        else:
            hypothesis_template = "Dette eksempel er {}."
            candidate_labels = ["positivt", "negativt", "neutralt"]

    # Else if the task is topic, classify the text into a topic
    elif task == "News topic classification":
        if language == "sv":
            hypothesis_template = "Detta exempel handlar om {}."
            candidate_labels = [
                "krig",
                "regering",
                "politik",
                "utbildning",
                "hälsa",
                "miljö",
                "ekonomi",
                "affärer",
                "mode",
                "underhållning",
                "sport",
            ]
        elif language == "no":
            hypothesis_template = "Dette eksemplet handler om {}."
            candidate_labels = [
                "krig",
                "myndighetene",
                "politikk",
                "utdanning",
                "helse",
                "miljø",
                "økonomi",
                "virksomhet",
                "mote",
                "underholdning",
                "sport",
            ]
        else:
            hypothesis_template = "Denne nyhedsartikel handler primært om {}."
            candidate_labels = [
                "krig",
                "regering",
                "politik",
                "uddannelse",
                "sundhed",
                "miljø",
                "økonomi",
                "forretning",
                "mode",
                "underholdning",
                "sport",
            ]

    # Else if the task is offensive text detection, classify the text into offensive
    # or not offensive
    elif task == "Offensive text detection":
        if language == "sv":
            hypothesis_template = "Detta exempel er {}."
            candidate_labels = ["stötande", "inte stötande"]
        elif language == "no":
            hypothesis_template = "Dette eksemplet er {}."
            candidate_labels = ["støtende", "ikke støtende"]
        else:
            hypothesis_template = "Dette eksempel er {}."
            candidate_labels = ["anstødig tale", "ikke anstødig tale"]

    # Else the task is not supported, so raise an error
    else:
        raise ValueError(f"Task {task} not supported.")

    # Run the classifier on the text
    result = classifier(
        doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
    )

    print(result)

    # Return the predicted label
    return (
        f"{result['labels'][0].capitalize()}\n"
        f"({confidence_str}: {result['scores'][0]:.0%})"
    )

# Create a dropdown menu for the task
dropdown = gr.inputs.Dropdown(
    label="Task",
    choices=["Sentiment classification", "News topic classification", "Offensive text detection"],
    default="Sentiment classification",
)

# Create the interface, where the function depends on the task chosen
interface = gr.Interface(
    fn=classification,
    inputs=[dropdown, gr.inputs.Textbox(label="Text")],
    outputs=gr.outputs.Label(type="text"),
    title="Scandinavian zero-shot text classification",
    description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!",
)

# Run the app
interface.launch()