"""Gradio app that showcases Scandinavian zero-shot text classification models.""" import gradio as gr from transformers import pipeline from luga import language as detect_language # Load the zero-shot classification pipeline classifier = pipeline( "zero-shot-classification", model="alexandrainst/scandi-nli-large" ) def classification(task: str, doc: str) -> str: """Classify text into categories. Args: task (str): Task to perform. doc (str): Text to classify. Returns: str: The predicted label. """ # Detect the language of the text language = detect_language(doc.replace('\n', ' ')).name # Define the confidence string based on the language if language == "sv" or language == "no": confidence_str = "konfidensnivå" else: confidence_str = "konfidensniveau" # If the task is sentiment, classify the text into positive, negative or neutral if task == "Sentiment classification": if language == "sv": hypothesis_template = "Detta exempel är {}." candidate_labels = ["positivt", "negativt", "neutralt"] elif language == "no": hypothesis_template = "Dette eksemplet er {}." candidate_labels = ["positivt", "negativt", "nøytralt"] else: hypothesis_template = "Dette eksempel er {}." candidate_labels = ["positivt", "negativt", "neutralt"] # Else if the task is topic, classify the text into a topic elif task == "News topic classification": if language == "sv": hypothesis_template = "Detta exempel handlar om {}." candidate_labels = [ "krig", "regering", "politik", "utbildning", "hälsa", "miljö", "ekonomi", "affärer", "mode", "underhållning", "sport", ] elif language == "no": hypothesis_template = "Dette eksemplet handler om {}." candidate_labels = [ "krig", "myndighetene", "politikk", "utdanning", "helse", "miljø", "økonomi", "virksomhet", "mote", "underholdning", "sport", ] else: hypothesis_template = "Denne nyhedsartikel handler primært om {}." candidate_labels = [ "krig", "regering", "politik", "uddannelse", "sundhed", "miljø", "økonomi", "forretning", "mode", "underholdning", "sport", ] # Else if the task is offensive text detection, classify the text into offensive # or not offensive elif task == "Offensive text detection": if language == "sv": hypothesis_template = "Detta exempel er {}." candidate_labels = ["stötande", "inte stötande"] elif language == "no": hypothesis_template = "Dette eksemplet er {}." candidate_labels = ["støtende", "ikke støtende"] else: hypothesis_template = "Dette eksempel er {}." candidate_labels = ["anstødig tale", "ikke anstødig tale"] # Else the task is not supported, so raise an error else: raise ValueError(f"Task {task} not supported.") # Run the classifier on the text result = classifier( doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template ) print(result) # Return the predicted label return ( f"{result['labels'][0].capitalize()}\n" f"({confidence_str}: {result['scores'][0]:.0%})" ) # Create a dropdown menu for the task dropdown = gr.inputs.Dropdown( label="Task", choices=["Sentiment classification", "News topic classification", "Offensive text detection"], default="Sentiment classification", ) # Create the interface, where the function depends on the task chosen interface = gr.Interface( fn=classification, inputs=[dropdown, gr.inputs.Textbox(label="Text")], outputs=gr.outputs.Label(type="text"), title="Scandinavian zero-shot text classification", description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!", ) # Run the app interface.launch()