from typing import  Dict, List, Any
from optimum.onnxruntime import ORTModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline


class EndpointHandler():
    def __init__(self, path=""):
        # load the optimized model
        self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx")
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        # create pipeline
        self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`list`:. The list contains the answer and scores of the inference inputs
        """
        inputs = data.get("inputs", data)
        # run the model
        prediction = self.pipeline(**inputs)
        # return prediction
        return prediction