impresso-project
/

language-identifier

Token Classification

language-identification

Model card Files Files and versions

language-identifier / lang_detect.py

Gleb Vinarskis

added emas pipeline

ae8276c 6 months ago

959 Bytes

	from transformers import Pipeline


	class LangDetectionPipeline(Pipeline):

	def _sanitize_parameters(self, **kwargs):
	preprocess_kwargs = {}
	if "text" in kwargs:
	preprocess_kwargs["text"] = kwargs["text"]
	return preprocess_kwargs, {}, {}

	def preprocess(self, text, **kwargs):
	# Nothing to preprocess
	return text

	def _forward(self, text, **kwargs):
	predictions, probabilities = self.model(text)
	return predictions, probabilities

	def postprocess(self, outputs, **kwargs):
	predictions, probabilities = outputs
	label = predictions[0][0].replace("__label__", "") # Remove __label__ prefix
	confidence = float(
	probabilities[0][0]
	) # Convert to float for JSON serialization

	# Format as JSON-compatible dictionary
	model_output = {"label": label, "confidence": round(confidence * 100, 2)}
	return model_output