roemmele commited on
Commit
947b77b
·
1 Parent(s): 0e586ca

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +1 -0
handler.py CHANGED
@@ -8,6 +8,7 @@ class EndpointHandler():
8
  self.model = AutoModelForCausalLM.from_pretrained(path,
9
  torch_dtype=torch.bfloat16,
10
  device_map="auto",
 
11
  trust_remote_code=True)
12
  self.tokenizer = AutoTokenizer.from_pretrained(path)
13
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
8
  self.model = AutoModelForCausalLM.from_pretrained(path,
9
  torch_dtype=torch.bfloat16,
10
  device_map="auto",
11
+ load_in_8bit=True,
12
  trust_remote_code=True)
13
  self.tokenizer = AutoTokenizer.from_pretrained(path)
14
  self.device = "cuda" if torch.cuda.is_available() else "cpu"