roemmele
/

falcon-40b-loss-score

Inference Endpoints

Model card Files Files and versions Community

roemmele commited on Jul 6, 2023

Commit

5dcb1e5

·

1 Parent(s): b3ecc0b

Upload handler.py

Files changed (1) hide show

handler.py +6 -2

handler.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import Dict, List, Any
@@ -5,15 +6,18 @@ from typing import Dict, List, Any
 class EndpointHandler():
     def __init__(self, path=""):
         self.model = AutoModelForCausalLM.from_pretrained(path,
                                                           trust_remote_code=True)
         self.tokenizer = AutoTokenizer.from_pretrained(path)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         input_text = data.pop("inputs", data)
         inputs = self.tokenizer(input_text,
                                 return_tensors="pt")
-        input_ids = inputs.input_ids
-        attention_mask = inputs.attention_mask
         score = self.model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=input_ids).loss.item()

+import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import Dict, List, Any
 class EndpointHandler():
     def __init__(self, path=""):
         self.model = AutoModelForCausalLM.from_pretrained(path,
+                                                          torch_dtype=torch.float16,
+                                                          device_map="auto",
                                                           trust_remote_code=True)
         self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         input_text = data.pop("inputs", data)
         inputs = self.tokenizer(input_text,
                                 return_tensors="pt")
+        input_ids = inputs.input_ids.to(self.device)
+        attention_mask = inputs.attention_mask.to(self.device)
         score = self.model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=input_ids).loss.item()