EchoStreet
/

mpt-7b

Text Generation

StreamingDatasets

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

rlanner-echocap commited on Jul 28, 2023

Commit

742a64f

·

1 Parent(s): 0975dcd

Update handler.py

Moving device assignment to pipeline call

Files changed (1) hide show

handler.py +3 -7

handler.py CHANGED Viewed

@@ -10,14 +10,10 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
-        tokenizer = AutoTokenizer.from_pretrained(path)
-        config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
-        config.init_device = 'cuda:0'
-        model = AutoModelForCausalLM.from_pretrained(path, config=config, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
         # create inference pipeline
-        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)

 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
         # create inference pipeline
+        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cuda:0')
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)