Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

zero-shot-classification / app.py

saattrupdan

docs: Add CPU disclaimer

a0f796f over 2 years ago

raw

history blame

5.14 kB

	"""Gradio app that showcases Scandinavian zero-shot text classification models."""

	import gradio as gr
	from transformers import pipeline
	from luga import language as detect_language


	# Load the zero-shot classification pipeline
	classifier = pipeline(
	"zero-shot-classification", model="alexandrainst/scandi-nli-large"
	)


	# Set the description for the interface
	DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without
	any training data!

	Note that the models will most likely not work as well as a finetuned model on your
	specific data, but they can be used as a starting point for your own classification
	task ✨


	Also, be patient, as this demo is running on a CPU!"""


	def classification(task: str, doc: str) -> str:
	"""Classify text into categories.

	Args:
	task (str):
	Task to perform.
	doc (str):
	Text to classify.

	Returns:
	str:
	The predicted label.
	"""
	# Detect the language of the text
	language = detect_language(doc.replace('\n', ' ')).name

	# Define the confidence string based on the language
	if language == "sv" or language == "no":
	confidence_str = "konfidensnivå"
	else:
	confidence_str = "konfidensniveau"

	# If the task is sentiment, classify the text into positive, negative or neutral
	if task == "Sentiment classification":
	if language == "sv":
	hypothesis_template = "Detta exempel är {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]
	elif language == "no":
	hypothesis_template = "Dette eksemplet er {}."
	candidate_labels = ["positivt", "negativt", "nøytralt"]
	else:
	hypothesis_template = "Dette eksempel er {}."
	candidate_labels = ["positivt", "negativt", "neutralt"]

	# Else if the task is topic, classify the text into a topic
	elif task == "News topic classification":
	if language == "sv":
	hypothesis_template = "Detta exempel handlar om {}."
	candidate_labels = [
	"krig",
	"politik",
	"utbildning",
	"hälsa",
	"ekonomi",
	"mode",
	"sport",
	]
	elif language == "no":
	hypothesis_template = "Dette eksemplet handler om {}."
	candidate_labels = [
	"krig",
	"politikk",
	"utdanning",
	"helse",
	"økonomi",
	"mote",
	"sport",
	]
	else:
	hypothesis_template = "Denne nyhedsartikel handler primært om {}."
	candidate_labels = [
	"krig",
	"politik",
	"uddannelse",
	"sundhed",
	"økonomi",
	"mode",
	"sport",
	]

	# Else if the task is spam detection, classify the text into spam or not spam
	elif task == "Spam detection":
	if language == "sv":
	hypothesis_template = "Det här e-postmeddelandet ser {}"
	candidate_labels = {
	"ut som ett skräppostmeddelande": "Spam",
	"inte ut som ett skräppostmeddelande": "Inte spam",
	}
	elif language == "no":
	hypothesis_template = "Denne e-posten ser {}"
	candidate_labels = {
	"ut som en spam-e-post": "Spam",
	"ikke ut som en spam-e-post": "Ikke spam",
	}
	else:
	hypothesis_template = "Denne e-mail ligner {}"
	candidate_labels = {
	"en spam e-mail": "Spam",
	"ikke en spam e-mail": "Ikke spam",
	}

	# Else the task is not supported, so raise an error
	else:
	raise ValueError(f"Task {task} not supported.")

	# If `candidate_labels` is a list then convert it to a dictionary, where the keys
	# are the entries in the list and the values are the keys capitalized
	if isinstance(candidate_labels, list):
	candidate_labels = {label: label.capitalize() for label in candidate_labels}

	# Run the classifier on the text
	result = classifier(
	doc,
	candidate_labels=list(candidate_labels.keys()),
	hypothesis_template=hypothesis_template,
	)

	print(result)

	# Return the predicted label
	return (
	f"{candidate_labels[result['labels'][0]]}\n"
	f"({confidence_str}: {result['scores'][0]:.0%})"
	)

	# Create a dropdown menu for the task
	dropdown = gr.inputs.Dropdown(
	label="Task",
	choices=["Sentiment classification", "News topic classification", "Spam detection"],
	default="Sentiment classification",
	)

	# Create the interface, where the function depends on the task chosen
	interface = gr.Interface(
	fn=classification,
	inputs=[dropdown, gr.inputs.Textbox(label="Text")],
	outputs=gr.outputs.Label(type="text"),
	title="Scandinavian zero-shot text classification",
	description=DESCRIPTION,
	)

	# Run the app
	interface.launch()