Daemontatox commited on
Commit
93c26ac
·
verified ·
1 Parent(s): c357bf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -20,7 +20,7 @@ from transformers import (
20
  )
21
 
22
  # Configuration Constants
23
- MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
24
 
25
 
26
  # Understand]: Analyze the question to identify key details and clarify the goal.
@@ -78,10 +78,10 @@ h3 {
78
  def initialize_model():
79
  """Initialize the model with appropriate configurations"""
80
  quantization_config = BitsAndBytesConfig(
81
- load_in_8bit=True,
82
- bnb_8bit_compute_dtype=torch.bfloat16,
83
- bnb_8bit_quant_type="nf4",
84
- bnb_8bit_use_double_quant=True
85
  )
86
 
87
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
@@ -94,7 +94,7 @@ def initialize_model():
94
  device_map="cuda",
95
  attn_implementation="flash_attention_2",
96
  trust_remote_code=True,
97
- #quantization_config=quantization_config
98
 
99
  )
100
 
 
20
  )
21
 
22
  # Configuration Constants
23
+ MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
24
 
25
 
26
  # Understand]: Analyze the question to identify key details and clarify the goal.
 
78
  def initialize_model():
79
  """Initialize the model with appropriate configurations"""
80
  quantization_config = BitsAndBytesConfig(
81
+ load_in_4bit=True,
82
+ bnb_4bit_compute_dtype=torch.bfloat16,
83
+ bnb_4bit_quant_type="nf4",
84
+ bnb_4bit_use_double_quant=True
85
  )
86
 
87
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
 
94
  device_map="cuda",
95
  attn_implementation="flash_attention_2",
96
  trust_remote_code=True,
97
+ quantization_config=quantization_config
98
 
99
  )
100