予測時間を計測できるように

Browse files

Files changed (2) hide show

__pycache__/handler.cpython-311.pyc +0 -0
handler.py +16 -7

__pycache__/handler.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/handler.cpython-311.pyc and b/__pycache__/handler.cpython-311.pyc differ

handler.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import  Dict, List, Any
 # from optimum.onnxruntime import ORTModelForSequenceClassification
 # from transformers import pipeline, AutoTokenizer
 from FlagEmbedding import BGEM3FlagModel
 class EndpointHandler():
     def __init__(self, path="."):
@@ -24,12 +24,21 @@ class EndpointHandler():
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
-        result = self.model.encode(inputs, return_dense=True, return_sparse=False)
         # print(result)
-        dense_vectors = result["dense_vecs"]
-        # sparse_vectors = result["lexical_weights"]
         # defaultdict(<class 'int'>, {'6': 0.09546, '192661': 0.3323})
         # pass inputs with all kwargs in data
@@ -40,13 +49,13 @@ class EndpointHandler():
         # postprocess the prediction
         # レスポンスをの型をkey=str, value=floatのdictにする。なお、numpy.float16はjsonに変換できないので、floatに変換する。
-        # sparse_vectors = {str(k): float(v) for k, v in sparse_vectors.items()}
         # レスポンスの型をnumpy.ndarrayから、通常のarrayに変更する
-        dense_vectors = dense_vectors.tolist()
         return [
             [
-                { "outputs": dense_vectors}
             ]
         ]

 # from optimum.onnxruntime import ORTModelForSequenceClassification
 # from transformers import pipeline, AutoTokenizer
 from FlagEmbedding import BGEM3FlagModel
+import time
 class EndpointHandler():
     def __init__(self, path="."):
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        # encodeメソッドの実行前に時間を記録
+        start_time = time.time()
+        result = self.model.encode(inputs, return_dense=False, return_sparse=True)
+        # encodeメソッドの実行後に時間を記録
+        end_time = time.time()
         # print(result)
+        # dense_vectors = result["dense_vecs"]
+        # 経過時間を計算
+        elapsed_time = end_time - start_time
+        print(f"Encoding took {elapsed_time:.4f} seconds")
+        sparse_vectors = result["lexical_weights"]
         # defaultdict(<class 'int'>, {'6': 0.09546, '192661': 0.3323})
         # pass inputs with all kwargs in data
         # postprocess the prediction
         # レスポンスをの型をkey=str, value=floatのdictにする。なお、numpy.float16はjsonに変換できないので、floatに変換する。
+        sparse_vectors = {str(k): float(v) for k, v in sparse_vectors.items()}
         # レスポンスの型をnumpy.ndarrayから、通常のarrayに変更する
+        # dense_vectors = dense_vectors.tolist()
         return [
             [
+                { "outputs": sparse_vectors}
             ]
         ]