langident
custom_code
impresso-langident / lang_detect.py
Gleb Vinarskis
added emas pipeline
ae8276c
raw
history blame
959 Bytes
from transformers import Pipeline
class LangDetectionPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
preprocess_kwargs = {}
if "text" in kwargs:
preprocess_kwargs["text"] = kwargs["text"]
return preprocess_kwargs, {}, {}
def preprocess(self, text, **kwargs):
# Nothing to preprocess
return text
def _forward(self, text, **kwargs):
predictions, probabilities = self.model(text)
return predictions, probabilities
def postprocess(self, outputs, **kwargs):
predictions, probabilities = outputs
label = predictions[0][0].replace("__label__", "") # Remove __label__ prefix
confidence = float(
probabilities[0][0]
) # Convert to float for JSON serialization
# Format as JSON-compatible dictionary
model_output = {"label": label, "confidence": round(confidence * 100, 2)}
return model_output