File size: 1,234 Bytes
5dcb1e5
3c54df5
 
 
 
 
e232f5c
4207d8b
 
0e586ca
5dcb1e5
947b77b
3c54df5
 
5dcb1e5
3c54df5
 
 
 
 
5dcb1e5
 
3c54df5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Dict, List, Any


class EndpointHandler():
    def __init__(self, path=""):
        path = "tiiuae/falcon-40b"
        self.model = AutoModelForCausalLM.from_pretrained(path,
                                                          torch_dtype=torch.bfloat16,
                                                          device_map="auto",
                                                          load_in_8bit=True,
                                                          trust_remote_code=True)
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        input_text = data.pop("inputs", data)
        inputs = self.tokenizer(input_text,
                                return_tensors="pt")
        input_ids = inputs.input_ids.to(self.device)
        attention_mask = inputs.attention_mask.to(self.device)
        score = self.model(input_ids=input_ids,
                           attention_mask=attention_mask,
                           labels=input_ids).loss.item()
        return score