philschmid commited on
Commit
5d3586d
·
1 Parent(s): d7d7e6d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +29 -17
handler.py CHANGED
@@ -1,17 +1,28 @@
1
  from typing import Dict, List, Any
 
 
2
  from transformers import pipeline
3
- import mii
 
4
 
5
  class EndpointHandler():
6
  def __init__(self, path=""):
7
- self.deploy_name = "bert"
8
- mii_config = {"dtype": "fp16"}
9
- mii.deploy(task='text-classification',
10
- model=path,
11
- deployment_name=deploy_name,
12
- mii_config=mii_config)
13
- # create handler for server
14
- self.pipeline = mii.mii_query_handle(self.deploy_name)
 
 
 
 
 
 
 
 
15
 
16
 
17
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -22,12 +33,13 @@ class EndpointHandler():
22
  Return:
23
  A :obj:`list` | `dict`: will be serialized and returned
24
  """
25
- # get inputs
26
- inputs = data.pop("inputs",data)
27
- date = data.pop("date", None)
28
- # check if date exists and if it is a holiday
29
- if date is not None and date in self.holidays:
30
- return [{"label": "happy", "score": 1}]
31
- # run normal prediction
32
- prediction = self.pipeline(inputs)
 
33
  return prediction
 
1
  from typing import Dict, List, Any
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification,pipeline
4
  from transformers import pipeline
5
+ import deepspeed
6
+
7
 
8
  class EndpointHandler():
9
  def __init__(self, path=""):
10
+ # load model and tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(path)
12
+ model = AutoModelForSequenceClassification.from_pretrained(path)
13
+
14
+ # init deepspeed inference engine
15
+ ds_model = deepspeed.init_inference(
16
+ model=model, # Transformers models
17
+ mp_size=1, # Number of GPU
18
+ dtype=torch.half, # dtype of the weights (fp16)
19
+ # injection_policy={"BertLayer" : HFBertLayerPolicy}, # replace BertLayer with DS HFBertLayerPolicy
20
+ replace_method="auto", # Lets DS autmatically identify the layer to replace
21
+ replace_with_kernel_inject=True, # replace the model with the kernel injector
22
+ )
23
+
24
+ # create acclerated pipeline
25
+ self.pipeline = pipeline("text-classification", model=ds_model, tokenizer=tokenizer, device=0)
26
 
27
 
28
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
 
33
  Return:
34
  A :obj:`list` | `dict`: will be serialized and returned
35
  """
36
+ inputs = data.pop("inputs", data)
37
+ parameters = data.pop("parameters", None)
38
+
39
+ # pass inputs with all kwargs in data
40
+ if parameters is not None:
41
+ prediction = self.pipeline(inputs, **parameters)
42
+ else:
43
+ prediction = self.pipeline(inputs)
44
+ # postprocess the prediction
45
  return prediction