Spaces:
Runtime error
Runtime error
Update app_kontext.py
Browse files- app_kontext.py +8 -8
app_kontext.py
CHANGED
|
@@ -41,17 +41,17 @@ logger = logging.getLogger(__name__)
|
|
| 41 |
|
| 42 |
# TESTING TWO QUANTIZATION METHODS
|
| 43 |
# 1) If FP8 is supported; `torchao` for quantization
|
| 44 |
-
quant_config = PipelineQuantizationConfig(
|
| 45 |
-
quant_backend="torchao",
|
| 46 |
-
quant_kwargs={"quant_type": "float8dq_e4m3_row"},
|
| 47 |
-
components_to_quantize=["transformer"]
|
| 48 |
-
)
|
| 49 |
-
# 2) Otherwise, standard 4-bit quantization with bitsandbytes
|
| 50 |
# quant_config = PipelineQuantizationConfig(
|
| 51 |
-
# quant_backend="
|
| 52 |
-
# quant_kwargs={"
|
| 53 |
# components_to_quantize=["transformer"]
|
| 54 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
try:
|
| 57 |
# Set max memory usage for ZeroGPU
|
|
|
|
| 41 |
|
| 42 |
# TESTING TWO QUANTIZATION METHODS
|
| 43 |
# 1) If FP8 is supported; `torchao` for quantization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# quant_config = PipelineQuantizationConfig(
|
| 45 |
+
# quant_backend="torchao",
|
| 46 |
+
# quant_kwargs={"quant_type": "float8dq_e4m3_row"},
|
| 47 |
# components_to_quantize=["transformer"]
|
| 48 |
# )
|
| 49 |
+
# 2) Otherwise, standard 4-bit quantization with bitsandbytes
|
| 50 |
+
quant_config = PipelineQuantizationConfig(
|
| 51 |
+
quant_backend="bitsandbytes_4bit",
|
| 52 |
+
quant_kwargs={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16, "bnb_4bit_quant_type": "nf4"},
|
| 53 |
+
components_to_quantize=["transformer"]
|
| 54 |
+
)
|
| 55 |
|
| 56 |
try:
|
| 57 |
# Set max memory usage for ZeroGPU
|