Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

zero-shot-classification / app.py

saattrupdan

feat: Add topic classification and offensive speech detection

efd38a2 over 2 years ago

raw

history blame

4.62 kB

	"""Gradio app that showcases Scandinavian zero-shot text classification models."""

	import gradio as gr
	from transformers import pipeline
	from luga import language as detect_language


	# Load the zero-shot classification pipeline
	classifier = pipeline(
	"zero-shot-classification", model="alexandrainst/scandi-nli-large"
	)


	def classification(task: str, doc: str) -> str:
	"""Classify text into categories.

	Args:
	task (str):
	Task to perform.
	doc (str):
	Text to classify.

	Returns:
	str:
	The predicted label.
	"""
	# Detect the language of the text
	language = detect_language(doc.replace('\n', ' ')).name

	# Define the confidence string based on the language
	if language == "sv" or language == "no":
	confidence_str = "konfidensnivå"
	else:
	confidence_str = "konfidensniveau"

	# If the task is sentiment, classify the text into positive, negative or neutral
	if task == "Sentiment classification":
	if language == "sv":
	hypothesis_template = "Detta exempel är {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]
	elif language == "no":
	hypothesis_template = "Dette eksemplet er {}."
	candidate_labels = ["positivt", "negativt", "nøytralt"]
	else:
	hypothesis_template = "Dette eksempel er {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]

	# Else if the task is topic, classify the text into a topic
	elif task == "News topic classification":
	if language == "sv":
	hypothesis_template = "Detta exempel handlar om {}."
	candidate_labels = [
	"krig",
	"regering",
	"politik",
	"utbildning",
	"hälsa",
	"miljö",
	"ekonomi",
	"affärer",
	"mode",
	"underhållning",
	"sport",
	]
	elif language == "no":
	hypothesis_template = "Dette eksemplet handler om {}."
	candidate_labels = [
	"krig",
	"myndighetene",
	"politikk",
	"utdanning",
	"helse",
	"miljø",
	"økonomi",
	"virksomhet",
	"mote",
	"underholdning",
	"sport",
	]
	else:
	hypothesis_template = "Denne nyhedsartikel handler primært om {}."
	candidate_labels = [
	"krig",
	"regering",
	"politik",
	"uddannelse",
	"sundhed",
	"miljø",
	"økonomi",
	"forretning",
	"mode",
	"underholdning",
	"sport",
	]

	# Else if the task is offensive text detection, classify the text into offensive
	# or not offensive
	elif task == "Offensive text detection":
	if language == "sv":
	hypothesis_template = "Detta exempel er {}."
	candidate_labels = ["stötande", "inte stötande"]
	elif language == "no":
	hypothesis_template = "Dette eksemplet er {}."
	candidate_labels = ["støtende", "ikke støtende"]
	else:
	hypothesis_template = "Dette eksempel er {}."
	candidate_labels = ["anstødig tale", "ikke anstødig tale"]

	# Else the task is not supported, so raise an error
	else:
	raise ValueError(f"Task {task} not supported.")

	# Run the classifier on the text
	result = classifier(
	doc, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template
	)

	print(result)

	# Return the predicted label
	return (
	f"{result['labels'][0].capitalize()}\n"
	f"({confidence_str}: {result['scores'][0]:.0%})"
	)

	# Create a dropdown menu for the task
	dropdown = gr.inputs.Dropdown(
	label="Task",
	choices=["Sentiment classification", "News topic classification", "Offensive text detection"],
	default="Sentiment classification",
	)

	# Create the interface, where the function depends on the task chosen
	interface = gr.Interface(
	fn=classification,
	inputs=[dropdown, gr.inputs.Textbox(label="Text")],
	outputs=gr.outputs.Label(type="text"),
	title="Scandinavian zero-shot text classification",
	description="Classify text in Danish, Swedish or Norwegian into categories, without any training data!",
	)

	# Run the app
	interface.launch()