saattrupdan's picture
docs: Clarify description
80b5399
raw
history blame
6.35 kB
"""Gradio app that showcases Scandinavian zero-shot text classification models."""
import gradio as gr
from transformers import pipeline
from luga import language as detect_language
# Load the zero-shot classification pipeline
classifier = pipeline(
"zero-shot-classification", model="alexandrainst/scandi-nli-large"
)
# Set the description for the interface
DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without
finetuning on any training data!
Note that the models will most likely *not* work as well as a finetuned model on your
specific data, but they can be used as a starting point for your own classification
task ✨
Also, be patient, as this demo is running on a CPU!"""
def classification(task: str, doc: str) -> str:
"""Classify text into categories.
Args:
task (str):
Task to perform.
doc (str):
Text to classify.
Returns:
str:
The predicted label.
"""
# Detect the language of the text
language = detect_language(doc.replace('\n', ' ')).name
# Define the confidence string based on the language
if language == "sv" or language == "no":
confidence_str = "konfidensnivå"
else:
confidence_str = "konfidensniveau"
# If the task is sentiment, classify the text into positive, negative or neutral
if task == "Sentiment classification":
if language == "sv":
hypothesis_template = "Detta exempel är {}."
candidate_labels = ["positivt", "negativt", "neutralt"]
elif language == "no":
hypothesis_template = "Dette eksemplet er {}."
candidate_labels = ["positivt", "negativt", "nøytralt"]
else:
hypothesis_template = "Dette eksempel er {}."
candidate_labels = ["positivt", "negativt", "neutralt"]
# Else if the task is topic, classify the text into a topic
elif task == "News topic classification":
if language == "sv":
hypothesis_template = "Detta exempel handlar om {}."
candidate_labels = [
"krig",
"politik",
"utbildning",
"hälsa",
"ekonomi",
"mode",
"sport",
]
elif language == "no":
hypothesis_template = "Dette eksemplet handler om {}."
candidate_labels = [
"krig",
"politikk",
"utdanning",
"helse",
"økonomi",
"mote",
"sport",
]
else:
hypothesis_template = "Denne nyhedsartikel handler primært om {}."
candidate_labels = [
"krig",
"politik",
"uddannelse",
"sundhed",
"økonomi",
"mode",
"sport",
]
# Else if the task is spam detection, classify the text into spam or not spam
elif task == "Spam detection":
if language == "sv":
hypothesis_template = "Det här e-postmeddelandet ser {}."
candidate_labels = {
"ut som ett skräppostmeddelande": "Spam",
"inte ut som ett skräppostmeddelande": "Inte spam",
}
elif language == "no":
hypothesis_template = "Denne e-posten ser {}."
candidate_labels = {
"ut som en spam-e-post": "Spam",
"ikke ut som en spam-e-post": "Ikke spam",
}
else:
hypothesis_template = "Denne e-mail ligner {}."
candidate_labels = {
"en spam e-mail": "Spam",
"ikke en spam e-mail": "Ikke spam",
}
# Else if the task is product feedback detection, classify the text into product
# feedback or not product feedback
elif task == "Product feedback detection":
if language == "sv":
hypothesis_template = "Den här kommentaren är {}."
candidate_labels = {
"en recension av en produkt": "Produktfeedback",
"inte en recension av en produkt": "Inte produktfeedback",
}
elif language == "no":
hypothesis_template = "Denne kommentaren er {}."
candidate_labels = {
"en anmeldelse av et produkt": "Produkttilbakemelding",
"ikke en anmeldelse av et produkt": "Ikke produkttilbakemelding",
}
else:
hypothesis_template = "Denne kommentar er {}."
candidate_labels = {
"en anmeldelse af et produkt": "Produktfeedback",
"ikke en anmeldelse af et produkt": "Ikke produktfeedback",
}
# Else the task is not supported, so raise an error
else:
raise ValueError(f"Task {task} not supported.")
# If `candidate_labels` is a list then convert it to a dictionary, where the keys
# are the entries in the list and the values are the keys capitalized
if isinstance(candidate_labels, list):
candidate_labels = {label: label.capitalize() for label in candidate_labels}
# Run the classifier on the text
result = classifier(
doc,
candidate_labels=list(candidate_labels.keys()),
hypothesis_template=hypothesis_template,
)
print(result)
# Return the predicted label
return (
f"{candidate_labels[result['labels'][0]]}\n"
f"({confidence_str}: {result['scores'][0]:.0%})"
)
# Create a dropdown menu for the task
dropdown = gr.inputs.Dropdown(
label="Task",
choices=[
"Sentiment classification",
"News topic classification",
"Spam detection",
"Product feedback detection",
],
default="Sentiment classification",
)
# Create a text box for the input text
input_textbox = gr.inputs.Textbox(
label="Text", default="Jeg er helt vild med fodbolden 😊"
)
# Create the interface, where the function depends on the task chosen
interface = gr.Interface(
fn=classification,
inputs=[dropdown, input_textbox],
outputs=gr.outputs.Label(type="text"),
title="Scandinavian zero-shot text classification",
description=DESCRIPTION,
)
# Run the app
interface.launch()