sparse_only_stage: pruning_modifiers: SparseGPTModifier: sparsity: 0.5 sequential_update: true mask_structure: '2:4' targets: ['re:model.layers.\d*$'] quant_stage: quant_modifiers: QuantizationModifier: ignore: [lm_head] targets: [Linear] scheme: FP8_DYNAMIC constant_pruning_modifiers: ConstantPruningModifier: targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight', 're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight'] start: 0