Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

zero-shot-classification / app.py

saattrupdan

docs: Clarify description

80b5399 over 2 years ago

raw

history blame

6.35 kB

	"""Gradio app that showcases Scandinavian zero-shot text classification models."""

	import gradio as gr
	from transformers import pipeline
	from luga import language as detect_language


	# Load the zero-shot classification pipeline
	classifier = pipeline(
	"zero-shot-classification", model="alexandrainst/scandi-nli-large"
	)


	# Set the description for the interface
	DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without
	finetuning on any training data!

	Note that the models will most likely not work as well as a finetuned model on your
	specific data, but they can be used as a starting point for your own classification
	task ✨


	Also, be patient, as this demo is running on a CPU!"""


	def classification(task: str, doc: str) -> str:
	"""Classify text into categories.

	Args:
	task (str):
	Task to perform.
	doc (str):
	Text to classify.

	Returns:
	str:
	The predicted label.
	"""
	# Detect the language of the text
	language = detect_language(doc.replace('\n', ' ')).name

	# Define the confidence string based on the language
	if language == "sv" or language == "no":
	confidence_str = "konfidensnivå"
	else:
	confidence_str = "konfidensniveau"

	# If the task is sentiment, classify the text into positive, negative or neutral
	if task == "Sentiment classification":
	if language == "sv":
	hypothesis_template = "Detta exempel är {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]
	elif language == "no":
	hypothesis_template = "Dette eksemplet er {}."
	candidate_labels = ["positivt", "negativt", "nøytralt"]
	else:
	hypothesis_template = "Dette eksempel er {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]

	# Else if the task is topic, classify the text into a topic
	elif task == "News topic classification":
	if language == "sv":
	hypothesis_template = "Detta exempel handlar om {}."
	candidate_labels = [
	"krig",
	"politik",
	"utbildning",
	"hälsa",
	"ekonomi",
	"mode",
	"sport",
	]
	elif language == "no":
	hypothesis_template = "Dette eksemplet handler om {}."
	candidate_labels = [
	"krig",
	"politikk",
	"utdanning",
	"helse",
	"økonomi",
	"mote",
	"sport",
	]
	else:
	hypothesis_template = "Denne nyhedsartikel handler primært om {}."
	candidate_labels = [
	"krig",
	"politik",
	"uddannelse",
	"sundhed",
	"økonomi",
	"mode",
	"sport",
	]

	# Else if the task is spam detection, classify the text into spam or not spam
	elif task == "Spam detection":
	if language == "sv":
	hypothesis_template = "Det här e-postmeddelandet ser {}."
	candidate_labels = {
	"ut som ett skräppostmeddelande": "Spam",
	"inte ut som ett skräppostmeddelande": "Inte spam",
	}
	elif language == "no":
	hypothesis_template = "Denne e-posten ser {}."
	candidate_labels = {
	"ut som en spam-e-post": "Spam",
	"ikke ut som en spam-e-post": "Ikke spam",
	}
	else:
	hypothesis_template = "Denne e-mail ligner {}."
	candidate_labels = {
	"en spam e-mail": "Spam",
	"ikke en spam e-mail": "Ikke spam",
	}

	# Else if the task is product feedback detection, classify the text into product
	# feedback or not product feedback
	elif task == "Product feedback detection":
	if language == "sv":
	hypothesis_template = "Den här kommentaren är {}."
	candidate_labels = {
	"en recension av en produkt": "Produktfeedback",
	"inte en recension av en produkt": "Inte produktfeedback",
	}
	elif language == "no":
	hypothesis_template = "Denne kommentaren er {}."
	candidate_labels = {
	"en anmeldelse av et produkt": "Produkttilbakemelding",
	"ikke en anmeldelse av et produkt": "Ikke produkttilbakemelding",
	}
	else:
	hypothesis_template = "Denne kommentar er {}."
	candidate_labels = {
	"en anmeldelse af et produkt": "Produktfeedback",
	"ikke en anmeldelse af et produkt": "Ikke produktfeedback",
	}

	# Else the task is not supported, so raise an error
	else:
	raise ValueError(f"Task {task} not supported.")

	# If `candidate_labels` is a list then convert it to a dictionary, where the keys
	# are the entries in the list and the values are the keys capitalized
	if isinstance(candidate_labels, list):
	candidate_labels = {label: label.capitalize() for label in candidate_labels}

	# Run the classifier on the text
	result = classifier(
	doc,
	candidate_labels=list(candidate_labels.keys()),
	hypothesis_template=hypothesis_template,
	)

	print(result)

	# Return the predicted label
	return (
	f"{candidate_labels[result['labels'][0]]}\n"
	f"({confidence_str}: {result['scores'][0]:.0%})"
	)

	# Create a dropdown menu for the task
	dropdown = gr.inputs.Dropdown(
	label="Task",
	choices=[
	"Sentiment classification",
	"News topic classification",
	"Spam detection",
	"Product feedback detection",
	],
	default="Sentiment classification",
	)

	# Create a text box for the input text
	input_textbox = gr.inputs.Textbox(
	label="Text", default="Jeg er helt vild med fodbolden 😊"
	)

	# Create the interface, where the function depends on the task chosen
	interface = gr.Interface(
	fn=classification,
	inputs=[dropdown, input_textbox],
	outputs=gr.outputs.Label(type="text"),
	title="Scandinavian zero-shot text classification",
	description=DESCRIPTION,
	)

	# Run the app
	interface.launch()