Upload handler.py
Browse files- handler.py +6 -2
handler.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
from typing import Dict, List, Any
|
3 |
|
@@ -5,15 +6,18 @@ from typing import Dict, List, Any
|
|
5 |
class EndpointHandler():
|
6 |
def __init__(self, path=""):
|
7 |
self.model = AutoModelForCausalLM.from_pretrained(path,
|
|
|
|
|
8 |
trust_remote_code=True)
|
9 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
|
|
10 |
|
11 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
12 |
input_text = data.pop("inputs", data)
|
13 |
inputs = self.tokenizer(input_text,
|
14 |
return_tensors="pt")
|
15 |
-
input_ids = inputs.input_ids
|
16 |
-
attention_mask = inputs.attention_mask
|
17 |
score = self.model(input_ids=input_ids,
|
18 |
attention_mask=attention_mask,
|
19 |
labels=input_ids).loss.item()
|
|
|
1 |
+
import torch
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
from typing import Dict, List, Any
|
4 |
|
|
|
6 |
class EndpointHandler():
|
7 |
def __init__(self, path=""):
|
8 |
self.model = AutoModelForCausalLM.from_pretrained(path,
|
9 |
+
torch_dtype=torch.float16,
|
10 |
+
device_map="auto",
|
11 |
trust_remote_code=True)
|
12 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
13 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
|
15 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
16 |
input_text = data.pop("inputs", data)
|
17 |
inputs = self.tokenizer(input_text,
|
18 |
return_tensors="pt")
|
19 |
+
input_ids = inputs.input_ids.to(self.device)
|
20 |
+
attention_mask = inputs.attention_mask.to(self.device)
|
21 |
score = self.model(input_ids=input_ids,
|
22 |
attention_mask=attention_mask,
|
23 |
labels=input_ids).loss.item()
|