Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ from transformers import (
|
|
20 |
)
|
21 |
|
22 |
# Configuration Constants
|
23 |
-
MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-
|
24 |
|
25 |
|
26 |
# Understand]: Analyze the question to identify key details and clarify the goal.
|
@@ -78,10 +78,10 @@ h3 {
|
|
78 |
def initialize_model():
|
79 |
"""Initialize the model with appropriate configurations"""
|
80 |
quantization_config = BitsAndBytesConfig(
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
)
|
86 |
|
87 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
@@ -94,7 +94,7 @@ def initialize_model():
|
|
94 |
device_map="cuda",
|
95 |
attn_implementation="flash_attention_2",
|
96 |
trust_remote_code=True,
|
97 |
-
|
98 |
|
99 |
)
|
100 |
|
|
|
20 |
)
|
21 |
|
22 |
# Configuration Constants
|
23 |
+
MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
24 |
|
25 |
|
26 |
# Understand]: Analyze the question to identify key details and clarify the goal.
|
|
|
78 |
def initialize_model():
|
79 |
"""Initialize the model with appropriate configurations"""
|
80 |
quantization_config = BitsAndBytesConfig(
|
81 |
+
load_in_4bit=True,
|
82 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
83 |
+
bnb_4bit_quant_type="nf4",
|
84 |
+
bnb_4bit_use_double_quant=True
|
85 |
)
|
86 |
|
87 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
|
|
94 |
device_map="cuda",
|
95 |
attn_implementation="flash_attention_2",
|
96 |
trust_remote_code=True,
|
97 |
+
quantization_config=quantization_config
|
98 |
|
99 |
)
|
100 |
|