Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -59,10 +59,10 @@ h3 {
|
|
59 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
60 |
|
61 |
quantization_config = BitsAndBytesConfig(
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
|
67 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
68 |
model = AutoModelForCausalLM.from_pretrained(
|
@@ -215,7 +215,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
215 |
minimum=0,
|
216 |
maximum=1,
|
217 |
step=0.1,
|
218 |
-
value=0.
|
219 |
label="Temperature",
|
220 |
render=False,
|
221 |
),
|
|
|
59 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
60 |
|
61 |
quantization_config = BitsAndBytesConfig(
|
62 |
+
load_in_8bit=True, # Use 8-bit instead of 4-bit
|
63 |
+
bnb_8bit_compute_dtype=torch.bfloat16, # bfloat16 for compute
|
64 |
+
bnb_8bit_use_double_quant=False # Disable double quantization
|
65 |
+
)
|
66 |
|
67 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
68 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
215 |
minimum=0,
|
216 |
maximum=1,
|
217 |
step=0.1,
|
218 |
+
value=0.8,
|
219 |
label="Temperature",
|
220 |
render=False,
|
221 |
),
|