TensorTemplar commited on
Commit
5a2298c
·
1 Parent(s): 2ad6c1a

add fp8 dynamic quants in hf

Browse files
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128008,
11
+ 128009
12
+ ],
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 8192,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 28672,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 64,
22
+ "num_hidden_layers": 80,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "quantization_config": {
26
+ "config_groups": {
27
+ "group_0": {
28
+ "input_activations": {
29
+ "actorder": null,
30
+ "block_structure": null,
31
+ "dynamic": true,
32
+ "group_size": null,
33
+ "num_bits": 8,
34
+ "observer": null,
35
+ "observer_kwargs": {},
36
+ "strategy": "token",
37
+ "symmetric": true,
38
+ "type": "float"
39
+ },
40
+ "output_activations": null,
41
+ "targets": [
42
+ "Linear"
43
+ ],
44
+ "weights": {
45
+ "actorder": null,
46
+ "block_structure": null,
47
+ "dynamic": false,
48
+ "group_size": null,
49
+ "num_bits": 8,
50
+ "observer": "minmax",
51
+ "observer_kwargs": {},
52
+ "strategy": "channel",
53
+ "symmetric": true,
54
+ "type": "float"
55
+ }
56
+ }
57
+ },
58
+ "format": "float-quantized",
59
+ "global_compression_ratio": 1.463543865167781,
60
+ "ignore": [
61
+ "lm_head"
62
+ ],
63
+ "kv_cache_scheme": null,
64
+ "quant_method": "compressed-tensors",
65
+ "quantization_status": "compressed",
66
+ "sparsity_config": {}
67
+ },
68
+ "rms_norm_eps": 1e-05,
69
+ "rope_scaling": {
70
+ "factor": 8.0,
71
+ "high_freq_factor": 4.0,
72
+ "low_freq_factor": 1.0,
73
+ "original_max_position_embeddings": 8192,
74
+ "rope_type": "llama3"
75
+ },
76
+ "rope_theta": 500000.0,
77
+ "tie_word_embeddings": false,
78
+ "torch_dtype": "float16",
79
+ "transformers_version": "4.47.1",
80
+ "use_cache": true,
81
+ "vocab_size": 128256
82
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.47.1"
12
+ }
model-00001-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2178b65750d64f607cc68b797cae6f61752a66529f08d62f6d247852efcff43c
3
+ size 4819958480
model-00002-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b2607d674930d06afb0ce658f5e6860fa3cc3fc63452f5ef4ad280d46171e3
3
+ size 4983969544
model-00003-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:973fb6fcd9aeb9987fe67857d18ee4d6bd7ebf01d76d0f1d12424a6188d9c14b
3
+ size 4900112688
model-00004-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a308440f8d8b3de1148ca84749607193c9acf87889c4a7798b6cef1bae6179
3
+ size 4900145696
model-00005-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159d0d72173d14b1c2c7dfb8ddd39d89b90bf2f7e286102d2b34731ac738605f
3
+ size 4900104736
model-00006-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58389d9cdaca5814293099b35b523e8b8649f21158bf664ccfe0bc923a7492dc
3
+ size 4983969632
model-00007-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd44585e011c8c35eb56a8512f18c15b58f2734834848fd8ab8203123d3cbb3
3
+ size 4900112704
model-00008-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4de115594b16030cdb698a42be5d7ed7934acec764ae3ec5b024ae1c7e30808
3
+ size 4900145696
model-00009-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3e653d3e1a60432aa59f3bff32f39953b4f5d276c112e7117ffa48f28dc39e
3
+ size 4900104736
model-00010-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee070c5e1d05f631c5eef82b18fe84e22ee9d9307edbf9ea68651c7699130f29
3
+ size 4983969632
model-00011-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4aa47cf3e2e801e0e0a35adfc9e09813ca07ff2a317365e3221d8aee972c48
3
+ size 4900112704
model-00012-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945eeeb742dc1ba191107c31b61dc7d1ca0304c31b70c01027c61d141ea53c81
3
+ size 4900145696
model-00013-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785cf9e853cf9f90db7879f5f7c840334941231e97be7068074f1de1cb43e5d4
3
+ size 4900104736
model-00014-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da013113af82acf5ac31cee63b0c452c8488549e2809993fbf1f0294de93562d
3
+ size 4983969632
model-00015-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f55cc57d0cd9f99d2486045b8382401e94a0fa17520bbd5377661e79947a60
3
+ size 3813027648
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff