Conrad Lippert-Zajaczkowski
commited on
Commit
·
3555efd
1
Parent(s):
997becd
inference
Browse files- config.json +1 -1
- handler.py +3 -4
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "swapnice/swapnice-openorcaxopenchat-preview2-13b",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
handler.py
CHANGED
@@ -3,17 +3,16 @@ from typing import Dict, List, Any
|
|
3 |
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
|
4 |
|
5 |
# get dtype
|
6 |
-
dtype = torch.float16
|
7 |
|
8 |
class EndpointHandler:
|
9 |
def __init__(self, path=""):
|
10 |
# load the model
|
11 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
12 |
model = LlamaForCausalLM.from_pretrained(
|
13 |
-
"
|
14 |
device_map="auto",
|
15 |
torch_dtype=dtype,
|
16 |
-
offload_folder="offload"
|
17 |
)
|
18 |
# create inference pipeline
|
19 |
self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
|
3 |
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
|
4 |
|
5 |
# get dtype
|
6 |
+
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
|
7 |
|
8 |
class EndpointHandler:
|
9 |
def __init__(self, path=""):
|
10 |
# load the model
|
11 |
+
tokenizer = LlamaTokenizer.from_pretrained("/repository/tokenizer", local_files_only=True)
|
12 |
model = LlamaForCausalLM.from_pretrained(
|
13 |
+
"/repository/pytorch_model",
|
14 |
device_map="auto",
|
15 |
torch_dtype=dtype,
|
|
|
16 |
)
|
17 |
# create inference pipeline
|
18 |
self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
|