philschmid
/

finbert-tone-endpoint-ds

Text Classification

financial-sentiment-analysis

sentiment-analysis

Inference Endpoints

Model card Files Files and versions Community

philschmid commited on Jan 18, 2023

Commit

5d3586d

·

1 Parent(s): d7d7e6d

Update handler.py

Files changed (1) hide show

handler.py +29 -17

handler.py CHANGED Viewed

@@ -1,17 +1,28 @@
 from typing import Dict, List, Any
 from transformers import pipeline
-import mii
 class EndpointHandler():
     def __init__(self, path=""):
-        self.deploy_name = "bert"
-        mii_config = {"dtype": "fp16"}
-        mii.deploy(task='text-classification',
-                   model=path,
-                   deployment_name=deploy_name,
-                   mii_config=mii_config)
-        # create handler for server
-        self.pipeline = mii.mii_query_handle(self.deploy_name)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -22,12 +33,13 @@ class EndpointHandler():
       Return:
             A :obj:`list` | `dict`: will be serialized and returned
         """
-        # get inputs
-        inputs = data.pop("inputs",data)
-        date = data.pop("date", None)
-        # check if date exists and if it is a holiday
-        if date is not None and date in self.holidays:
-          return [{"label": "happy", "score": 1}]
-        # run normal prediction
-        prediction = self.pipeline(inputs)
         return prediction

 from typing import Dict, List, Any
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification,pipeline
 from transformers import pipeline
+import deepspeed
 class EndpointHandler():
     def __init__(self, path=""):
+        # load model and tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForSequenceClassification.from_pretrained(path)
+        # init deepspeed inference engine
+        ds_model = deepspeed.init_inference(
+            model=model,      # Transformers models
+            mp_size=1,        # Number of GPU
+            dtype=torch.half, # dtype of the weights (fp16)
+            # injection_policy={"BertLayer" : HFBertLayerPolicy}, # replace BertLayer with DS HFBertLayerPolicy
+            replace_method="auto", # Lets DS autmatically identify the layer to replace
+            replace_with_kernel_inject=True, # replace the model with the kernel injector
+        )
+        # create acclerated pipeline
+        self.pipeline = pipeline("text-classification", model=ds_model, tokenizer=tokenizer, device=0)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
       Return:
             A :obj:`list` | `dict`: will be serialized and returned
         """
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
+        else:
+            prediction = self.pipeline(inputs)
+        # postprocess the prediction
         return prediction