RelaxingSnorlax's picture
Upload folder using huggingface_hub
b8845f8 verified
sparse_only_stage:
pruning_modifiers:
SparseGPTModifier:
sparsity: 0.5
sequential_update: true
mask_structure: '2:4'
targets: ['re:model.layers.\d*$']
quant_stage:
quant_modifiers:
QuantizationModifier:
ignore: [lm_head]
targets: [Linear]
scheme: FP8_DYNAMIC
constant_pruning_modifiers:
ConstantPruningModifier:
targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']
start: 0