Conrad Lippert-Zajaczkowski
commited on
Commit
·
48f8e5d
1
Parent(s):
e31fd99
test specific
Browse files- handler.py +3 -2
handler.py
CHANGED
@@ -17,14 +17,15 @@ class EndpointHandler:
|
|
17 |
def __init__(self, path=""):
|
18 |
# load the model
|
19 |
print('starting to load tokenizer')
|
20 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
21 |
print('loaded tokenizer')
|
22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
24 |
model = LlamaForCausalLM.from_pretrained(
|
25 |
-
"
|
26 |
device_map="auto",
|
27 |
torch_dtype=dtype,
|
|
|
28 |
)
|
29 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
30 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
|
|
17 |
def __init__(self, path=""):
|
18 |
# load the model
|
19 |
print('starting to load tokenizer')
|
20 |
+
tokenizer = LlamaTokenizer.from_pretrained(".", local_files_only=True)
|
21 |
print('loaded tokenizer')
|
22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
24 |
model = LlamaForCausalLM.from_pretrained(
|
25 |
+
".",
|
26 |
device_map="auto",
|
27 |
torch_dtype=dtype,
|
28 |
+
offload_folder="offload"
|
29 |
)
|
30 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
31 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|