fxmarty
/

llama-tiny-w-fp8-a-fp8-o-fp8

Safetensors

llama

quark

Model card Files Files and versions Community

fxmarty commited on Oct 9, 2024

Commit

903d863

verified ·

1 Parent(s): 976dff0

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

config.json +56 -50

config.json CHANGED Viewed

@@ -21,58 +21,64 @@
   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
-    "algo_config": null,
-    "exclude": [
-      "lm_head"
-    ],
-    "export": {
-      "kv_cache_group": [],
-      "pack_method": "reorder",
-      "weight_format": "real_quantized",
-      "weight_merge_groups": null
-    },
-    "global_quant_config": {
-      "bias": null,
-      "input_tensors": {
-        "ch_axis": null,
-        "dtype": "fp8_e4m3",
-        "group_size": null,
-        "is_dynamic": false,
-        "observer_cls": "PerTensorMinMaxObserver",
-        "qscheme": "per_tensor",
-        "round_method": null,
-        "scale_type": null,
-        "symmetric": null
       },
-      "output_tensors": {
-        "ch_axis": null,
-        "dtype": "fp8_e4m3",
-        "group_size": null,
-        "is_dynamic": false,
-        "observer_cls": "PerTensorMinMaxObserver",
-        "qscheme": "per_tensor",
-        "round_method": null,
-        "scale_type": null,
-        "symmetric": null
       },
-      "target_device": null,
-      "weight": {
-        "ch_axis": null,
-        "dtype": "fp8_e4m3",
-        "group_size": null,
-        "is_dynamic": false,
-        "observer_cls": "PerTensorMinMaxObserver",
-        "qscheme": "per_tensor",
-        "round_method": null,
-        "scale_type": null,
-        "symmetric": null
-      }
-    },
-    "layer_quant_config": {},
-    "layer_type_quant_config": {},
-    "pack_method": "reorder",
-    "quant_method": "quark",
-    "quant_mode": 1
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
+    "activation_scheme": "static",
+    "kv_cache_scheme": null,
+    "library": "quark",
+    "quant_method": "fp8",
+    "quark_config": {
+      "algo_config": null,
+      "exclude": [
+        "lm_head"
+      ],
+      "export": {
+        "kv_cache_group": [],
+        "pack_method": "reorder",
+        "weight_format": "real_quantized",
+        "weight_merge_groups": null
       },
+      "global_quant_config": {
+        "bias": null,
+        "input_tensors": {
+          "ch_axis": null,
+          "dtype": "fp8_e4m3",
+          "group_size": null,
+          "is_dynamic": false,
+          "observer_cls": "PerTensorMinMaxObserver",
+          "qscheme": "per_tensor",
+          "round_method": null,
+          "scale_type": null,
+          "symmetric": null
+        },
+        "output_tensors": {
+          "ch_axis": null,
+          "dtype": "fp8_e4m3",
+          "group_size": null,
+          "is_dynamic": false,
+          "observer_cls": "PerTensorMinMaxObserver",
+          "qscheme": "per_tensor",
+          "round_method": null,
+          "scale_type": null,
+          "symmetric": null
+        },
+        "target_device": null,
+        "weight": {
+          "ch_axis": null,
+          "dtype": "fp8_e4m3",
+          "group_size": null,
+          "is_dynamic": false,
+          "observer_cls": "PerTensorMinMaxObserver",
+          "qscheme": "per_tensor",
+          "round_method": null,
+          "scale_type": null,
+          "symmetric": null
+        }
       },
+      "layer_quant_config": {},
+      "layer_type_quant_config": {},
+      "pack_method": "reorder",
+      "quant_method": "quark",
+      "quant_mode": 1
+    }
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,