|
"""Gradio app that showcases Scandinavian zero-shot text classification models.""" |
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
from luga import language as detect_language |
|
|
|
|
|
|
|
classifier = pipeline( |
|
"zero-shot-classification", model="alexandrainst/scandi-nli-large" |
|
) |
|
|
|
|
|
def classification(task: str, doc: str) -> str: |
|
"""Classify text into categories. |
|
|
|
Args: |
|
task (str): |
|
Task to perform. |
|
doc (str): |
|
Text to classify. |
|
|
|
Returns: |
|
str: |
|
The predicted label. |
|
""" |
|
|
|
language = detect_language(doc.replace('\n', ' ')).name |
|
|
|
|
|
if language == "sv" or language == "no": |
|
confidence_str = "konfidensnivå" |
|
else: |
|
confidence_str = "konfidensniveau" |
|
|
|
|
|
if task == "Sentiment classification": |
|
if language == "sv": |
|
hypothesis_template = "Detta exempel är {}." |
|
candidate_labels = ["positivt", "negativt", "neutralt"] |
|
elif language == "no": |
|
hypothesis_template = "Dette eksemplet er {}." |
|
candidate_labels = ["positivt", "negativt", "nøytralt"] |
|
else: |
|
hypothesis_template = "Dette eksempel er {}." |
|
candidate_labels = ["positivt", "negativt", "neutralt"] |
|
|
|
|
|
elif task == "News topic classification": |
|
if language == "sv": |
|
hypothesis_template = "Detta exempel handlar om {}." |
|
candidate_labels = [ |
|
"krig", |
|
"regering", |
|
"politik", |
|
"utbildning", |
|
"hälsa", |
|
"miljö", |
|
"ekonomi", |
|
"affärer", |
|
"mode", |
|
"underhållning", |
|
"sport", |
|
] |
|
elif language == "no": |
|
hypothesis_template = "Dette eksemplet handler om {}." |
|
candidate_labels = [ |
|
"krig", |
|
"myndighetene", |
|
"politikk", |
|
"utdanning", |
|
"helse", |
|
"miljø", |
|
"økonomi", |
|
"virksomhet", |
|
"mote", |
|
"underholdning", |
|
"sport", |
|
] |
|
else: |
|
hypothesis_template = "Denne nyhedsartikel handler primært om {}." |
|
candidate_labels = [ |
|
"krig", |
|
"regering", |
|
"politik", |
|
"uddannelse", |
|
"sundhed", |
|
"miljø", |
|
"økonomi", |
|
"forretning", |
|
"mode", |
|
"underholdning", |
|
"sport", |
|
] |
|
|
|
|
|
|
|
elif task == "Offensive text detection": |
|
if language == "sv": |
|
hypothesis_template = "Detta exempel er {}." |
|
candidate_labels = ["stötande", "inte stötande"] |
|
elif language == "no": |
|
hypothesis_template = "Dette eksemplet er {}." |
|
candidate_labels = ["støtende", "ikke støtende"] |
|
else: |
|
hypothesis_template = "Dette eksempel er {}." |
|
candidate_labels = ["anstødig tale", "ikke anstødig tale"] |
|
|
|
|
|
else: |
|
raise ValueError(f"Task {task} not supported.") |
|
|
|
|
|
result = classifier( |
|
doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template |
|
) |
|
|
|
print(result) |
|
|
|
|
|
return ( |
|
f"{result['labels'][0].capitalize()}\n" |
|
f"({confidence_str}: {result['scores'][0]:.0%})" |
|
) |
|
|
|
|
|
dropdown = gr.inputs.Dropdown( |
|
label="Task", |
|
choices=["Sentiment classification", "News topic classification", "Offensive text detection"], |
|
default="Sentiment classification", |
|
) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=classification, |
|
inputs=[dropdown, gr.inputs.Textbox(label="Text")], |
|
outputs=gr.outputs.Label(type="text"), |
|
title="Scandinavian zero-shot text classification", |
|
description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!", |
|
) |
|
|
|
|
|
interface.launch() |
|
|