quantization_stage: | |
sparsity_modifiers: | |
SparseGPTModifier: | |
sparsity: 0.5 | |
mask_structure: '2:4' | |
sequential_update: false | |
sequential_targets: [LlamaDecoderLayer] | |
targets: [Linear] | |
ignore: ['re:.*lm_head'] | |
block_size: 128 | |
dampening_frac: 0.01 | |
preserve_sparsity_mask: false | |
offload_hessians: false | |
quantization_modifiers: | |
GPTQModifier: | |
config_groups: | |
group_0: | |
targets: [Linear] | |
weights: | |
num_bits: 4 | |
type: int | |
symmetric: true | |
group_size: null | |
strategy: channel | |
block_structure: null | |
dynamic: false | |
actorder: null | |
observer: minmax | |
observer_kwargs: {} | |
input_activations: null | |
output_activations: null | |
targets: [Linear] | |
ignore: [lm_head] | |
sequential_update: true | |
block_size: 128 | |
dampening_frac: 0.01 | |
offload_hessians: false | |