File size: 2,457 Bytes
a03b9b6 4af82f4 473c7e6 a03b9b6 c0a3a1a a03b9b6 4af82f4 90973f8 a03b9b6 4af82f4 473c7e6 a971cda 473c7e6 9aae34a 473c7e6 ae99f99 473c7e6 ae99f99 473c7e6 0d6d4f8 a03b9b6 4af82f4 a03b9b6 0d6d4f8 473c7e6 0d6d4f8 ae99f99 473c7e6 ae99f99 4f6d751 473c7e6 4f6d751 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from typing import Dict, List, Any
# from optimum.onnxruntime import ORTModelForSequenceClassification
# from transformers import pipeline, AutoTokenizer
from FlagEmbedding import BGEM3FlagModel
import time
class EndpointHandler():
def __init__(self, path="."):
# load the optimized model
# モデルの準備
self.model = BGEM3FlagModel(path, use_fp16=True)
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
"""
Args:
data (:obj:):
includes the input data and the parameters for the inference.
Return:
A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
- "label": A string representing what the label/class is. There can be multiple labels.
- "score": A score between 0 and 1 describing how confident the model is for this label/class.
"""
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", None)
# encodeメソッドの実行前に時間を記録
start_time = time.time()
result = self.model.encode(inputs, return_dense=False, return_sparse=True, max_length=1024)
# encodeメソッドの実行後に時間を記録
end_time = time.time()
# print(result)
# dense_vectors = result["dense_vecs"]
# 経過時間を計算
elapsed_time = end_time - start_time
print(f"Encoding took {elapsed_time:.4f} seconds")
sparse_vectors = result["lexical_weights"]
# defaultdict(<class 'int'>, {'6': 0.09546, '192661': 0.3323})
# pass inputs with all kwargs in data
# if parameters is not None:
# prediction = self.pipeline(inputs, **parameters)
# else:
# prediction = self.pipeline(inputs)
# postprocess the prediction
# レスポンスをの型をkey=str, value=floatのdictにする。なお、numpy.float16はjsonに変換できないので、floatに変換する。
sparse_vectors = {str(k): float(v) for k, v in sparse_vectors.items()}
# レスポンスの型をnumpy.ndarrayから、通常のarrayに変更する
# dense_vectors = dense_vectors.tolist()
return [
[
{ "outputs": sparse_vectors}
]
]
|