ruslanmv commited on
Commit
22e4d02
·
1 Parent(s): d4ba9d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -34,10 +34,11 @@ print(f"Using device: {DEVICE}")
34
  print(f"Low memory: {LOW_MEMORY}")
35
 
36
  # Quantization configuration for efficient model loading
37
- quantization_config = BitsAndBytesConfig(
38
- load_in_4bit=True,
39
- bnb_4bit_compute_dtype=torch.float16
40
- )
 
41
 
42
  # Load models only once
43
  processor = AutoProcessor.from_pretrained(MODEL_ID)
 
34
  print(f"Low memory: {LOW_MEMORY}")
35
 
36
  # Quantization configuration for efficient model loading
37
+ # Define BitsAndBytesConfig
38
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True,
39
+ bnb_4bit_quant_type="nf4",
40
+ bnb_4bit_compute_dtype=torch.float16)
41
+
42
 
43
  # Load models only once
44
  processor = AutoProcessor.from_pretrained(MODEL_ID)