"""Gradio app that showcases Scandinavian zero-shot text classification models.""" import gradio as gr from transformers import pipeline from luga import language as detect_language # Load the zero-shot classification pipeline classifier = pipeline( "zero-shot-classification", model="alexandrainst/scandi-nli-large" ) # Set the description for the interface DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without any training data! Note that the models will most likely *not* work as well as a finetuned model on your specific data, but they can be used as a starting point for your own classification task ✨""" def classification(task: str, doc: str) -> str: """Classify text into categories. Args: task (str): Task to perform. doc (str): Text to classify. Returns: str: The predicted label. """ # Detect the language of the text language = detect_language(doc.replace('\n', ' ')).name # Define the confidence string based on the language if language == "sv" or language == "no": confidence_str = "konfidensnivå" else: confidence_str = "konfidensniveau" # If the task is sentiment, classify the text into positive, negative or neutral if task == "Sentiment classification": if language == "sv": hypothesis_template = "Detta exempel är {}." candidate_labels = ["positivt", "negativt", "neutralt"] elif language == "no": hypothesis_template = "Dette eksemplet er {}." candidate_labels = ["positivt", "negativt", "nøytralt"] else: hypothesis_template = "Dette eksempel er {}." candidate_labels = ["positivt", "negativt", "neutralt"] # Else if the task is topic, classify the text into a topic elif task == "News topic classification": if language == "sv": hypothesis_template = "Detta exempel handlar om {}." candidate_labels = [ "krig", "politik", "utbildning", "hälsa", "ekonomi", "mode", "sport", ] elif language == "no": hypothesis_template = "Dette eksemplet handler om {}." candidate_labels = [ "krig", "politikk", "utdanning", "helse", "økonomi", "mote", "sport", ] else: hypothesis_template = "Denne nyhedsartikel handler primært om {}." candidate_labels = [ "krig", "politik", "uddannelse", "sundhed", "økonomi", "mode", "sport", ] # Else if the task is spam detection, classify the text into spam or not spam elif task == "Spam detection": if language == "sv": hypothesis_template = "Det här e-postmeddelandet ser {}" candidate_labels = { "ut som ett skräppostmeddelande": "Spam", "inte ut som ett skräppostmeddelande": "Inte spam", } elif language == "no": hypothesis_template = "Denne e-posten ser {}" candidate_labels = { "ut som en spam-e-post": "Spam", "ikke ut som en spam-e-post": "Ikke spam", } else: hypothesis_template = "Denne e-mail ligner {}" candidate_labels = { "en spam e-mail": "Spam", "ikke en spam e-mail": "Ikke spam", } # Else the task is not supported, so raise an error else: raise ValueError(f"Task {task} not supported.") # If `candidate_labels` is a list then convert it to a dictionary, where the keys # are the entries in the list and the values are the keys capitalized if isinstance(candidate_labels, list): candidate_labels = {label: label.capitalize() for label in candidate_labels} # Run the classifier on the text result = classifier( doc, candidate_labels=list(candidate_labels.keys()), hypothesis_template=hypothesis_template, ) print(result) # Return the predicted label return ( f"{candidate_labels[result['labels'][0]]}\n" f"({confidence_str}: {result['scores'][0]:.0%})" ) # Create a dropdown menu for the task dropdown = gr.inputs.Dropdown( label="Task", choices=["Sentiment classification", "News topic classification", "Spam detection"], default="Sentiment classification", ) # Create the interface, where the function depends on the task chosen interface = gr.Interface( fn=classification, inputs=[dropdown, gr.inputs.Textbox(label="Text")], outputs=gr.outputs.Label(type="text"), title="Scandinavian zero-shot text classification", description=DESCRIPTION, ) # Run the app interface.launch()