TensorTemplar commited on Feb 6

Commit

5a2298c

1 Parent(s): 2ad6c1a

add fp8 dynamic quants in hf

Browse files

Files changed (18) hide show

config.json +82 -0
generation_config.json +12 -0
model-00001-of-00015.safetensors +3 -0
model-00002-of-00015.safetensors +3 -0
model-00003-of-00015.safetensors +3 -0
model-00004-of-00015.safetensors +3 -0
model-00005-of-00015.safetensors +3 -0
model-00006-of-00015.safetensors +3 -0
model-00007-of-00015.safetensors +3 -0
model-00008-of-00015.safetensors +3 -0
model-00009-of-00015.safetensors +3 -0
model-00010-of-00015.safetensors +3 -0
model-00011-of-00015.safetensors +3 -0
model-00012-of-00015.safetensors +3 -0
model-00013-of-00015.safetensors +3 -0
model-00014-of-00015.safetensors +3 -0
model-00015-of-00015.safetensors +3 -0
model.safetensors.index.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 8192,
+  "initializer_range": 0.02,
+  "intermediate_size": 28672,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_hidden_layers": 80,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": true,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": null,
+          "observer_kwargs": {},
+          "strategy": "token",
+          "symmetric": true,
+          "type": "float"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "channel",
+          "symmetric": true,
+          "type": "float"
+        }
+      }
+    },
+    "format": "float-quantized",
+    "global_compression_ratio": 1.463543865167781,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {}
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 128256
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "bos_token_id": 128000,
+  "do_sample": true,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.47.1"
+}

model-00001-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2178b65750d64f607cc68b797cae6f61752a66529f08d62f6d247852efcff43c
+size 4819958480

model-00002-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50b2607d674930d06afb0ce658f5e6860fa3cc3fc63452f5ef4ad280d46171e3
+size 4983969544

model-00003-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:973fb6fcd9aeb9987fe67857d18ee4d6bd7ebf01d76d0f1d12424a6188d9c14b
+size 4900112688

model-00004-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93a308440f8d8b3de1148ca84749607193c9acf87889c4a7798b6cef1bae6179
+size 4900145696

model-00005-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:159d0d72173d14b1c2c7dfb8ddd39d89b90bf2f7e286102d2b34731ac738605f
+size 4900104736

model-00006-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58389d9cdaca5814293099b35b523e8b8649f21158bf664ccfe0bc923a7492dc
+size 4983969632

model-00007-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bd44585e011c8c35eb56a8512f18c15b58f2734834848fd8ab8203123d3cbb3
+size 4900112704

model-00008-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4de115594b16030cdb698a42be5d7ed7934acec764ae3ec5b024ae1c7e30808
+size 4900145696

model-00009-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3e653d3e1a60432aa59f3bff32f39953b4f5d276c112e7117ffa48f28dc39e
+size 4900104736

model-00010-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee070c5e1d05f631c5eef82b18fe84e22ee9d9307edbf9ea68651c7699130f29
+size 4983969632

model-00011-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf4aa47cf3e2e801e0e0a35adfc9e09813ca07ff2a317365e3221d8aee972c48
+size 4900112704

model-00012-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:945eeeb742dc1ba191107c31b61dc7d1ca0304c31b70c01027c61d141ea53c81
+size 4900145696

model-00013-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:785cf9e853cf9f90db7879f5f7c840334941231e97be7068074f1de1cb43e5d4
+size 4900104736

model-00014-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da013113af82acf5ac31cee63b0c452c8488549e2809993fbf1f0294de93562d
+size 4983969632

model-00015-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83f55cc57d0cd9f99d2486045b8382401e94a0fa17520bbd5377661e79947a60
+size 3813027648

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff