Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

saattrupdan commited on Dec 1, 2022

Commit

efd38a2

1 Parent(s): 99d1a14

feat: Add topic classification and offensive speech detection

Browse files

Files changed (1) hide show

app.py +106 -21

app.py CHANGED Viewed

@@ -11,47 +11,132 @@ classifier = pipeline(
 )
-def sentiment_classification(doc: str) -> str:
-    """Classify text into sentiment categories.
     Args:
         doc (str):
             Text to classify.
     Returns:
         str:
-            The predicted sentiment category.
     """
     # Detect the language of the text
-    language = detect_language(doc).name
-    # Get hypothesis template and candidate labels depending on the language
-    if language == "da":
-        hypothesis_template = "Dette eksempel er {}."
-        candidate_labels = ["positivt", "negativt", "neutralt"]
-    elif language == "sv":
-        hypothesis_template = "Detta exempel är {}."
-        candidate_labels = ["positivt", "negativt", "neutralt"]
-    elif language == "no":
-        hypothesis_template = "Dette eksemplet er {}."
-        candidate_labels = ["positivt", "negativt", "nøytralt"]
     # Run the classifier on the text
     result = classifier(
         doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
     )
     # Return the predicted label
-    return result["labels"][0]
-# Create the Gradio interface
 interface = gr.Interface(
-    fn=sentiment_classification,
-    inputs=gr.inputs.Textbox(lines=5, label="Text"),
     outputs=gr.outputs.Label(type="text"),
-    title="Scandinavian Zero-Shot Text Classification",
-    description="Classify text into sentiment categories.",
 )
 # Run the app

 )
+def classification(task: str, doc: str) -> str:
+    """Classify text into categories.
     Args:
+        task (str):
+            Task to perform.
         doc (str):
             Text to classify.
     Returns:
         str:
+            The predicted label.
     """
     # Detect the language of the text
+    language = detect_language(doc.replace('\n', ' ')).name
+    # Define the confidence string based on the language
+    if language == "sv" or language == "no":
+        confidence_str = "konfidensnivå"
+    else:
+        confidence_str = "konfidensniveau"
+    # If the task is sentiment, classify the text into positive, negative or neutral
+    if task == "Sentiment classification":
+        if language == "sv":
+            hypothesis_template = "Detta exempel är {}."
+            candidate_labels = ["positivt", "negativt", "neutralt"]
+        elif language == "no":
+            hypothesis_template = "Dette eksemplet er {}."
+            candidate_labels = ["positivt", "negativt", "nøytralt"]
+        else:
+            hypothesis_template = "Dette eksempel er {}."
+            candidate_labels = ["positivt", "negativt", "neutralt"]
+    # Else if the task is topic, classify the text into a topic
+    elif task == "News topic classification":
+        if language == "sv":
+            hypothesis_template = "Detta exempel handlar om {}."
+            candidate_labels = [
+                "krig",
+                "regering",
+                "politik",
+                "utbildning",
+                "hälsa",
+                "miljö",
+                "ekonomi",
+                "affärer",
+                "mode",
+                "underhållning",
+                "sport",
+            ]
+        elif language == "no":
+            hypothesis_template = "Dette eksemplet handler om {}."
+            candidate_labels = [
+                "krig",
+                "myndighetene",
+                "politikk",
+                "utdanning",
+                "helse",
+                "miljø",
+                "økonomi",
+                "virksomhet",
+                "mote",
+                "underholdning",
+                "sport",
+            ]
+        else:
+            hypothesis_template = "Denne nyhedsartikel handler primært om {}."
+            candidate_labels = [
+                "krig",
+                "regering",
+                "politik",
+                "uddannelse",
+                "sundhed",
+                "miljø",
+                "økonomi",
+                "forretning",
+                "mode",
+                "underholdning",
+                "sport",
+            ]
+    # Else if the task is offensive text detection, classify the text into offensive
+    # or not offensive
+    elif task == "Offensive text detection":
+        if language == "sv":
+            hypothesis_template = "Detta exempel er {}."
+            candidate_labels = ["stötande", "inte stötande"]
+        elif language == "no":
+            hypothesis_template = "Dette eksemplet er {}."
+            candidate_labels = ["støtende", "ikke støtende"]
+        else:
+            hypothesis_template = "Dette eksempel er {}."
+            candidate_labels = ["anstødig tale", "ikke anstødig tale"]
+    # Else the task is not supported, so raise an error
+    else:
+        raise ValueError(f"Task {task} not supported.")
     # Run the classifier on the text
     result = classifier(
         doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
     )
+    print(result)
     # Return the predicted label
+    return (
+        f"{result['labels'][0].capitalize()}\n"
+        f"({confidence_str}: {result['scores'][0]:.0%})"
+    )
+# Create a dropdown menu for the task
+dropdown = gr.inputs.Dropdown(
+    label="Task",
+    choices=["Sentiment classification", "News topic classification", "Offensive text detection"],
+    default="Sentiment classification",
+)
+# Create the interface, where the function depends on the task chosen
 interface = gr.Interface(
+    fn=classification,
+    inputs=[dropdown, gr.inputs.Textbox(label="Text")],
     outputs=gr.outputs.Label(type="text"),
+    title="Scandinavian zero-shot text classification",
+    description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!",
 )
 # Run the app