Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -32,10 +32,10 @@ class StopOnTokens(StoppingCriteria):
|
|
32 |
|
33 |
def initialize_model():
|
34 |
quantization_config = BitsAndBytesConfig(
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
)
|
40 |
|
41 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
@@ -47,7 +47,7 @@ def initialize_model():
|
|
47 |
quantization_config=quantization_config,
|
48 |
torch_dtype=torch.bfloat16,
|
49 |
trust_remote_code=True
|
50 |
-
)
|
51 |
|
52 |
return model, tokenizer
|
53 |
|
|
|
32 |
|
33 |
def initialize_model():
|
34 |
quantization_config = BitsAndBytesConfig(
|
35 |
+
load_in_4bit=True,
|
36 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
37 |
+
bnb_4bit_quant_type="nf4",
|
38 |
+
bnb_4bit_use_double_quant=True,
|
39 |
)
|
40 |
|
41 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
|
47 |
quantization_config=quantization_config,
|
48 |
torch_dtype=torch.bfloat16,
|
49 |
trust_remote_code=True
|
50 |
+
).to("cuda")
|
51 |
|
52 |
return model, tokenizer
|
53 |
|