nm-testing
/

TinyLlama-1.1B-Chat-v1.0-sparse2of4_fp8_dynamic-e2e

compressed-tensors

Model card Files Files and versions Community

nm-autobot commited on 23 days ago

Commit

0ab66cd

·

verified ·

1 Parent(s): 35d55cb

Upload folder using huggingface_hub

Files changed (2) hide show

model.safetensors +1 -1
recipe.yaml +0 -4

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cad9112b7a50ba66c8e8c0bb2f0bdb17100fd81927a8466bd107131ff746422
 size 868745640

 version https://git-lfs.github.com/spec/v1
+oid sha256:a72ebf9b26b0422fdc8dd9900509f19a713490f5ae9a30fd28b182b94d0ae90a
 size 868745640

recipe.yaml CHANGED Viewed

@@ -8,10 +8,6 @@ sparsity_stage:
   run_type: &id001 !!python/object/apply:llmcompressor.recipe.stage.StageRunType [oneshot]
 quantization_stage:
   quantization_modifiers:
-    ConstantPruningModifier:
-      targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
-        're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']
-      start: 0
     QuantizationModifier:
       targets: [Linear]
       ignore: [lm_head]

   run_type: &id001 !!python/object/apply:llmcompressor.recipe.stage.StageRunType [oneshot]
 quantization_stage:
   quantization_modifiers:
     QuantizationModifier:
       targets: [Linear]
       ignore: [lm_head]