saattrupdan commited on
Commit
efd38a2
·
1 Parent(s): 99d1a14

feat: Add topic classification and offensive speech detection

Browse files
Files changed (1) hide show
  1. app.py +106 -21
app.py CHANGED
@@ -11,47 +11,132 @@ classifier = pipeline(
11
  )
12
 
13
 
14
- def sentiment_classification(doc: str) -> str:
15
- """Classify text into sentiment categories.
16
 
17
  Args:
 
 
18
  doc (str):
19
  Text to classify.
20
 
21
  Returns:
22
  str:
23
- The predicted sentiment category.
24
  """
25
  # Detect the language of the text
26
- language = detect_language(doc).name
27
-
28
- # Get hypothesis template and candidate labels depending on the language
29
- if language == "da":
30
- hypothesis_template = "Dette eksempel er {}."
31
- candidate_labels = ["positivt", "negativt", "neutralt"]
32
- elif language == "sv":
33
- hypothesis_template = "Detta exempel är {}."
34
- candidate_labels = ["positivt", "negativt", "neutralt"]
35
- elif language == "no":
36
- hypothesis_template = "Dette eksemplet er {}."
37
- candidate_labels = ["positivt", "negativt", "nøytralt"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Run the classifier on the text
40
  result = classifier(
41
  doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
42
  )
43
 
 
 
44
  # Return the predicted label
45
- return result["labels"][0]
 
 
 
46
 
 
 
 
 
 
 
47
 
48
- # Create the Gradio interface
49
  interface = gr.Interface(
50
- fn=sentiment_classification,
51
- inputs=gr.inputs.Textbox(lines=5, label="Text"),
52
  outputs=gr.outputs.Label(type="text"),
53
- title="Scandinavian Zero-Shot Text Classification",
54
- description="Classify text into sentiment categories.",
55
  )
56
 
57
  # Run the app
 
11
  )
12
 
13
 
14
+ def classification(task: str, doc: str) -> str:
15
+ """Classify text into categories.
16
 
17
  Args:
18
+ task (str):
19
+ Task to perform.
20
  doc (str):
21
  Text to classify.
22
 
23
  Returns:
24
  str:
25
+ The predicted label.
26
  """
27
  # Detect the language of the text
28
+ language = detect_language(doc.replace('\n', ' ')).name
29
+
30
+ # Define the confidence string based on the language
31
+ if language == "sv" or language == "no":
32
+ confidence_str = "konfidensnivå"
33
+ else:
34
+ confidence_str = "konfidensniveau"
35
+
36
+ # If the task is sentiment, classify the text into positive, negative or neutral
37
+ if task == "Sentiment classification":
38
+ if language == "sv":
39
+ hypothesis_template = "Detta exempel är {}."
40
+ candidate_labels = ["positivt", "negativt", "neutralt"]
41
+ elif language == "no":
42
+ hypothesis_template = "Dette eksemplet er {}."
43
+ candidate_labels = ["positivt", "negativt", "nøytralt"]
44
+ else:
45
+ hypothesis_template = "Dette eksempel er {}."
46
+ candidate_labels = ["positivt", "negativt", "neutralt"]
47
+
48
+ # Else if the task is topic, classify the text into a topic
49
+ elif task == "News topic classification":
50
+ if language == "sv":
51
+ hypothesis_template = "Detta exempel handlar om {}."
52
+ candidate_labels = [
53
+ "krig",
54
+ "regering",
55
+ "politik",
56
+ "utbildning",
57
+ "hälsa",
58
+ "miljö",
59
+ "ekonomi",
60
+ "affärer",
61
+ "mode",
62
+ "underhållning",
63
+ "sport",
64
+ ]
65
+ elif language == "no":
66
+ hypothesis_template = "Dette eksemplet handler om {}."
67
+ candidate_labels = [
68
+ "krig",
69
+ "myndighetene",
70
+ "politikk",
71
+ "utdanning",
72
+ "helse",
73
+ "miljø",
74
+ "økonomi",
75
+ "virksomhet",
76
+ "mote",
77
+ "underholdning",
78
+ "sport",
79
+ ]
80
+ else:
81
+ hypothesis_template = "Denne nyhedsartikel handler primært om {}."
82
+ candidate_labels = [
83
+ "krig",
84
+ "regering",
85
+ "politik",
86
+ "uddannelse",
87
+ "sundhed",
88
+ "miljø",
89
+ "økonomi",
90
+ "forretning",
91
+ "mode",
92
+ "underholdning",
93
+ "sport",
94
+ ]
95
+
96
+ # Else if the task is offensive text detection, classify the text into offensive
97
+ # or not offensive
98
+ elif task == "Offensive text detection":
99
+ if language == "sv":
100
+ hypothesis_template = "Detta exempel er {}."
101
+ candidate_labels = ["stötande", "inte stötande"]
102
+ elif language == "no":
103
+ hypothesis_template = "Dette eksemplet er {}."
104
+ candidate_labels = ["støtende", "ikke støtende"]
105
+ else:
106
+ hypothesis_template = "Dette eksempel er {}."
107
+ candidate_labels = ["anstødig tale", "ikke anstødig tale"]
108
+
109
+ # Else the task is not supported, so raise an error
110
+ else:
111
+ raise ValueError(f"Task {task} not supported.")
112
 
113
  # Run the classifier on the text
114
  result = classifier(
115
  doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
116
  )
117
 
118
+ print(result)
119
+
120
  # Return the predicted label
121
+ return (
122
+ f"{result['labels'][0].capitalize()}\n"
123
+ f"({confidence_str}: {result['scores'][0]:.0%})"
124
+ )
125
 
126
+ # Create a dropdown menu for the task
127
+ dropdown = gr.inputs.Dropdown(
128
+ label="Task",
129
+ choices=["Sentiment classification", "News topic classification", "Offensive text detection"],
130
+ default="Sentiment classification",
131
+ )
132
 
133
+ # Create the interface, where the function depends on the task chosen
134
  interface = gr.Interface(
135
+ fn=classification,
136
+ inputs=[dropdown, gr.inputs.Textbox(label="Text")],
137
  outputs=gr.outputs.Label(type="text"),
138
+ title="Scandinavian zero-shot text classification",
139
+ description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!",
140
  )
141
 
142
  # Run the app