Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -30,20 +30,18 @@ import gradio as gr
|
|
30 |
from PIL import Image
|
31 |
import torch
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# bnb_4bit_quant_type="nf4",
|
41 |
-
# )
|
42 |
|
43 |
# Load the model and processor
|
44 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
45 |
"Qwen/QVQ-72B-Preview", device_map="auto",
|
46 |
-
|
47 |
offload_folder="offload",
|
48 |
)
|
49 |
processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
|
|
|
30 |
from PIL import Image
|
31 |
import torch
|
32 |
|
33 |
+
# Create a configuration for quantization
|
34 |
+
quantization_config = BitsAndBytesConfig(
|
35 |
+
load_in_8bit=True,
|
36 |
+
bnb_8bit_compute_dtype="float16",
|
37 |
+
bnb_8bit_use_double_quant=True,
|
38 |
+
bnb_8bit_quant_type="nf4",
|
39 |
+
)
|
|
|
|
|
40 |
|
41 |
# Load the model and processor
|
42 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
43 |
"Qwen/QVQ-72B-Preview", device_map="auto",
|
44 |
+
quantization_config=quantization_config,
|
45 |
offload_folder="offload",
|
46 |
)
|
47 |
processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
|