Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from transformers import (
|
|
15 |
StoppingCriteriaList
|
16 |
)
|
17 |
|
18 |
-
MODEL_ID = "
|
19 |
|
20 |
DEFAULT_SYSTEM_PROMPT = """
|
21 |
|
@@ -60,7 +60,7 @@ def initialize_model():
|
|
60 |
model = AutoModelForCausalLM.from_pretrained(
|
61 |
MODEL_ID,
|
62 |
device_map="cuda",
|
63 |
-
quantization_config=quantization_config,
|
64 |
torch_dtype=torch.bfloat16,
|
65 |
trust_remote_code=True,
|
66 |
attn_implementation="flash_attention_2"
|
@@ -97,7 +97,7 @@ def apply_llama3_chat_template(conversation, add_generation_prompt=True):
|
|
97 |
elif role == "USER":
|
98 |
prompt += "<|USER|>\n" + msg["content"].strip() + "\n"
|
99 |
elif role == "ASSISTANT":
|
100 |
-
prompt += "<|ASSISTANT|>\n" + msg["content"].strip() + "
|
101 |
if add_generation_prompt:
|
102 |
prompt += "<|ASSISTANT|>\n"
|
103 |
return prompt
|
|
|
15 |
StoppingCriteriaList
|
16 |
)
|
17 |
|
18 |
+
MODEL_ID = "Daemontatox/mini-Cogito-R1"
|
19 |
|
20 |
DEFAULT_SYSTEM_PROMPT = """
|
21 |
|
|
|
60 |
model = AutoModelForCausalLM.from_pretrained(
|
61 |
MODEL_ID,
|
62 |
device_map="cuda",
|
63 |
+
#quantization_config=quantization_config,
|
64 |
torch_dtype=torch.bfloat16,
|
65 |
trust_remote_code=True,
|
66 |
attn_implementation="flash_attention_2"
|
|
|
97 |
elif role == "USER":
|
98 |
prompt += "<|USER|>\n" + msg["content"].strip() + "\n"
|
99 |
elif role == "ASSISTANT":
|
100 |
+
prompt += "<|ASSISTANT|>\n" + msg["content"].strip() + "<think>\n"
|
101 |
if add_generation_prompt:
|
102 |
prompt += "<|ASSISTANT|>\n"
|
103 |
return prompt
|