File size: 2,457 Bytes

a03b9b6
4af82f4
 
 
473c7e6
a03b9b6
 
c0a3a1a
a03b9b6
4af82f4
90973f8
a03b9b6
 
 
 
 
 
 
 
 
 
 
 
 
 
4af82f4
473c7e6
 
 
a971cda
473c7e6
 
 
9aae34a
473c7e6
ae99f99
473c7e6
 
 
ae99f99
473c7e6
0d6d4f8
a03b9b6
 
4af82f4
 
 
 
a03b9b6
0d6d4f8
 
473c7e6
0d6d4f8
ae99f99
473c7e6
ae99f99
4f6d751
 
473c7e6
4f6d751

from typing import  Dict, List, Any
# from optimum.onnxruntime import ORTModelForSequenceClassification
# from transformers import pipeline, AutoTokenizer
from FlagEmbedding import BGEM3FlagModel
import time

class EndpointHandler():
    def __init__(self, path="."):
        # load the optimized model
        # モデルの準備
        self.model = BGEM3FlagModel(path, use_fp16=True)


    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
                - "label": A string representing what the label/class is. There can be multiple labels.
                - "score": A score between 0 and 1 describing how confident the model is for this label/class.
        """
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", None)
        
        # encodeメソッドの実行前に時間を記録
        start_time = time.time()
        
        result = self.model.encode(inputs, return_dense=False, return_sparse=True, max_length=1024)
        
        # encodeメソッドの実行後に時間を記録
        end_time = time.time()
        # print(result)
        # dense_vectors = result["dense_vecs"]
        
        # 経過時間を計算
        elapsed_time = end_time - start_time
        print(f"Encoding took {elapsed_time:.4f} seconds")
        
        sparse_vectors = result["lexical_weights"]
        # defaultdict(<class 'int'>, {'6': 0.09546, '192661': 0.3323})

        # pass inputs with all kwargs in data
        # if parameters is not None:
        #     prediction = self.pipeline(inputs, **parameters)
        # else:
        #     prediction = self.pipeline(inputs)
        # postprocess the prediction
        
        # レスポンスをの型をkey=str, value=floatのdictにする。なお、numpy.float16はjsonに変換できないので、floatに変換する。
        sparse_vectors = {str(k): float(v) for k, v in sparse_vectors.items()}
        
        # レスポンスの型をnumpy.ndarrayから、通常のarrayに変更する
        # dense_vectors = dense_vectors.tolist()
        
        return [
            [
                { "outputs": sparse_vectors}
            ]
        ]