Daemontatox commited on
Commit
32359f6
·
verified ·
1 Parent(s): afeb266

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -32,10 +32,10 @@ class StopOnTokens(StoppingCriteria):
32
 
33
  def initialize_model():
34
  quantization_config = BitsAndBytesConfig(
35
- load_in_8bit=True,
36
- bnb_8bit_compute_dtype=torch.bfloat16,
37
- bnb_8bit_quant_type="nf4",
38
- bnb_8bit_use_double_quant=True,
39
  )
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
@@ -47,7 +47,7 @@ def initialize_model():
47
  quantization_config=quantization_config,
48
  torch_dtype=torch.bfloat16,
49
  trust_remote_code=True
50
- )
51
 
52
  return model, tokenizer
53
 
 
32
 
33
  def initialize_model():
34
  quantization_config = BitsAndBytesConfig(
35
+ load_in_4bit=True,
36
+ bnb_4bit_compute_dtype=torch.bfloat16,
37
+ bnb_4bit_quant_type="nf4",
38
+ bnb_4bit_use_double_quant=True,
39
  )
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
47
  quantization_config=quantization_config,
48
  torch_dtype=torch.bfloat16,
49
  trust_remote_code=True
50
+ ).to("cuda")
51
 
52
  return model, tokenizer
53