Daemontatox commited on
Commit
9bab2dc
·
verified ·
1 Parent(s): a60291d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -59,10 +59,10 @@ h3 {
59
  device = "cuda" # for GPU usage or "cpu" for CPU usage
60
 
61
  quantization_config = BitsAndBytesConfig(
62
- load_in_4bit=True,
63
- bnb_4bit_compute_dtype=torch.bfloat16,
64
- bnb_4bit_use_double_quant=True,
65
- bnb_4bit_quant_type="nf4")
66
 
67
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
68
  model = AutoModelForCausalLM.from_pretrained(
@@ -215,7 +215,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
215
  minimum=0,
216
  maximum=1,
217
  step=0.1,
218
- value=0.9,
219
  label="Temperature",
220
  render=False,
221
  ),
 
59
  device = "cuda" # for GPU usage or "cpu" for CPU usage
60
 
61
  quantization_config = BitsAndBytesConfig(
62
+ load_in_8bit=True, # Use 8-bit instead of 4-bit
63
+ bnb_8bit_compute_dtype=torch.bfloat16, # bfloat16 for compute
64
+ bnb_8bit_use_double_quant=False # Disable double quantization
65
+ )
66
 
67
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
68
  model = AutoModelForCausalLM.from_pretrained(
 
215
  minimum=0,
216
  maximum=1,
217
  step=0.1,
218
+ value=0.8,
219
  label="Temperature",
220
  render=False,
221
  ),