Di Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ model_path = snapshot_download(
|
|
12 |
)
|
13 |
|
14 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
15 |
-
model = LlamaForCausalLM.from_pretrained(model_path)
|
16 |
|
17 |
DESCRIPTION = '''
|
18 |
# SimpleBerry/LLaMA-O1-Supervised-1129 | Duplicate the space and set it to private for faster & personal inference for free.
|
@@ -35,6 +35,7 @@ def llama_o1_template(data):
|
|
35 |
text = template.format(content=data)
|
36 |
return text
|
37 |
|
|
|
38 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
39 |
input_text = llama_o1_template(message)
|
40 |
inputs = tokenizer(input_text, return_tensors="pt")
|
|
|
12 |
)
|
13 |
|
14 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
15 |
+
model = LlamaForCausalLM.from_pretrained(model_path,device_map='auto')
|
16 |
|
17 |
DESCRIPTION = '''
|
18 |
# SimpleBerry/LLaMA-O1-Supervised-1129 | Duplicate the space and set it to private for faster & personal inference for free.
|
|
|
35 |
text = template.format(content=data)
|
36 |
return text
|
37 |
|
38 |
+
@spaces.GPU
|
39 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
40 |
input_text = llama_o1_template(message)
|
41 |
inputs = tokenizer(input_text, return_tensors="pt")
|