Conrad Lippert-Zajaczkowski
commited on
Commit
Β·
80ef686
1
Parent(s):
48f8e5d
test
Browse files
handler.py
CHANGED
@@ -17,15 +17,16 @@ class EndpointHandler:
|
|
17 |
def __init__(self, path=""):
|
18 |
# load the model
|
19 |
print('starting to load tokenizer')
|
20 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
21 |
print('loaded tokenizer')
|
22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
24 |
model = LlamaForCausalLM.from_pretrained(
|
25 |
-
"
|
26 |
device_map="auto",
|
27 |
torch_dtype=dtype,
|
28 |
-
offload_folder="offload"
|
|
|
29 |
)
|
30 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
31 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
|
|
17 |
def __init__(self, path=""):
|
18 |
# load the model
|
19 |
print('starting to load tokenizer')
|
20 |
+
tokenizer = LlamaTokenizer.from_pretrained("/repository/orca_tokenizer", local_files_only=True)
|
21 |
print('loaded tokenizer')
|
22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
24 |
model = LlamaForCausalLM.from_pretrained(
|
25 |
+
"/repository/pytorch_model",
|
26 |
device_map="auto",
|
27 |
torch_dtype=dtype,
|
28 |
+
offload_folder="offload",
|
29 |
+
local_files_only=True
|
30 |
)
|
31 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
32 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
added_tokens.json β orca_tokenizer/added_tokens.json
RENAMED
File without changes
|
special_tokens_map.json β orca_tokenizer/special_tokens_map.json
RENAMED
File without changes
|
tokenizer.model β orca_tokenizer/tokenizer.model
RENAMED
File without changes
|
tokenizer_config.json β orca_tokenizer/tokenizer_config.json
RENAMED
File without changes
|