jojo1899 commited on
Commit
d7dcec5
·
1 Parent(s): 653faf2

Quantized using nncf 2.13.0

Browse files
README.md CHANGED
@@ -7,14 +7,12 @@ tags:
7
 
8
  This is an INT4 quantized version of the `mistralai/Mistral-7B-Instruct-v0.2` model. The Python packages used in creating this model are as follows:
9
  ```
10
- openvino==2024.3.0.dev20240528
11
- openvino-nightly==2024.3.0.dev20240528
12
- openvino-tokenizers==2024.3.0.0.dev20240528
13
- optimum==1.19.2
14
- optimum-intel==1.17.0.dev0+aefabf0
15
- nncf==2.11.0.dev0+90a7f0d5
16
- torch==2.3.0+cu121
17
- transformers==4.40.2
18
  ```
19
  This quantized model is created using the following command:
20
  ```
@@ -25,5 +23,5 @@ For more details, run the following command from your Python environment: `optim
25
  INFO:nncf:Statistics of the bitwidth distribution:
26
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
27
  |----------------|-----------------------------|----------------------------------------|
28
- | 8 | 23% (82 / 226) | 20% (80 / 224) |
29
- | 4 | 77% (144 / 226) | 80% (144 / 224) |
 
7
 
8
  This is an INT4 quantized version of the `mistralai/Mistral-7B-Instruct-v0.2` model. The Python packages used in creating this model are as follows:
9
  ```
10
+ openvino==2024.4.0
11
+ optimum==1.23.3
12
+ optimum-intel==1.20.1
13
+ nncf==2.13.0
14
+ torch==2.5.1
15
+ transformers==4.46.1
 
 
16
  ```
17
  This quantized model is created using the following command:
18
  ```
 
23
  INFO:nncf:Statistics of the bitwidth distribution:
24
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
25
  |----------------|-----------------------------|----------------------------------------|
26
+ | 8 | 4% (2 / 226) | 0% (0 / 224) |
27
+ | 4 | 96% (224 / 226) | 100% (224 / 224) |
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
3
  "architectures": [
4
  "MistralForCausalLM"
@@ -6,6 +7,7 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
 
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
11
  "initializer_range": 0.02,
@@ -19,7 +21,8 @@
19
  "rope_theta": 1000000.0,
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
- "transformers_version": "4.40.2",
 
23
  "use_cache": true,
24
  "vocab_size": 32000
25
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
4
  "architectures": [
5
  "MistralForCausalLM"
 
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
+ "head_dim": 128,
11
  "hidden_act": "silu",
12
  "hidden_size": 4096,
13
  "initializer_range": 0.02,
 
21
  "rope_theta": 1000000.0,
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.46.1",
26
  "use_cache": true,
27
  "vocab_size": 32000
28
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.40.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.46.1"
6
  }
openvino_detokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f1b4bef7353cedf755dc0e05111ab3a98cb80f5ae9fa31fcd41df5761614685
3
- size 493443
 
 
 
 
openvino_detokenizer.xml DELETED
@@ -1,97 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="detokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="Parameter_309056" type="Parameter" version="opset1">
5
- <data shape="?,?" element_type="i64" />
6
- <output>
7
- <port id="0" precision="I64" names="Parameter_309056">
8
- <dim>-1</dim>
9
- <dim>-1</dim>
10
- </port>
11
- </output>
12
- </layer>
13
- <layer id="1" name="Constant_309036" type="Const" version="opset1">
14
- <data element_type="u8" shape="493443" offset="0" size="493443" />
15
- <output>
16
- <port id="0" precision="U8">
17
- <dim>493443</dim>
18
- </port>
19
- </output>
20
- </layer>
21
- <layer id="2" name="Convert_309066" type="Convert" version="opset1">
22
- <data destination_type="i32" />
23
- <input>
24
- <port id="0" precision="I64">
25
- <dim>-1</dim>
26
- <dim>-1</dim>
27
- </port>
28
- </input>
29
- <output>
30
- <port id="1" precision="I32">
31
- <dim>-1</dim>
32
- <dim>-1</dim>
33
- </port>
34
- </output>
35
- </layer>
36
- <layer id="3" name="SentencepieceDetokenizer_309057" type="SentencepieceDetokenizer" version="extension">
37
- <input>
38
- <port id="0" precision="U8">
39
- <dim>493443</dim>
40
- </port>
41
- <port id="1" precision="I32">
42
- <dim>-1</dim>
43
- <dim>-1</dim>
44
- </port>
45
- </input>
46
- <output>
47
- <port id="2" precision="I32">
48
- <dim>-1</dim>
49
- </port>
50
- <port id="3" precision="I32">
51
- <dim>-1</dim>
52
- </port>
53
- <port id="4" precision="U8">
54
- <dim>-1</dim>
55
- </port>
56
- </output>
57
- </layer>
58
- <layer id="4" name="StringTensorPack_309058" type="StringTensorPack" version="extension">
59
- <data mode="begins_ends" />
60
- <input>
61
- <port id="0" precision="I32">
62
- <dim>-1</dim>
63
- </port>
64
- <port id="1" precision="I32">
65
- <dim>-1</dim>
66
- </port>
67
- <port id="2" precision="U8">
68
- <dim>-1</dim>
69
- </port>
70
- </input>
71
- <output>
72
- <port id="3" precision="STRING" names="string_output">
73
- <dim>-1</dim>
74
- </port>
75
- </output>
76
- </layer>
77
- <layer id="5" name="Result_309059" type="Result" version="opset1">
78
- <input>
79
- <port id="0" precision="STRING">
80
- <dim>-1</dim>
81
- </port>
82
- </input>
83
- </layer>
84
- </layers>
85
- <edges>
86
- <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
87
- <edge from-layer="1" from-port="0" to-layer="3" to-port="0" />
88
- <edge from-layer="2" from-port="1" to-layer="3" to-port="1" />
89
- <edge from-layer="3" from-port="2" to-layer="4" to-port="0" />
90
- <edge from-layer="3" from-port="3" to-layer="4" to-port="1" />
91
- <edge from-layer="3" from-port="4" to-layer="4" to-port="2" />
92
- <edge from-layer="4" from-port="3" to-layer="5" to-port="0" />
93
- </edges>
94
- <rt_info>
95
- <eos_token_id value="2" />
96
- </rt_info>
97
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9349f9069b8e2f1c4916fe877250719395b80b3c43ec19f8eeccfb9023c8241d
3
- size 4611173288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d736c7cc65ba0058436bee9e93673888a85c6edef7ec04d591887bb88970711f
3
+ size 3889377328
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4671ca604486debb291edaea68a90715521f1188405b6bf396597e7a1b873117
3
- size 493451
 
 
 
 
openvino_tokenizer.xml DELETED
@@ -1,231 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="tokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="string_input" type="Parameter" version="opset1">
5
- <data shape="?" element_type="string" />
6
- <output>
7
- <port id="0" precision="STRING" names="string_input">
8
- <dim>-1</dim>
9
- </port>
10
- </output>
11
- </layer>
12
- <layer id="1" name="Constant_309042" type="Const" version="opset1">
13
- <data element_type="i32" shape="" offset="0" size="4" />
14
- <output>
15
- <port id="0" precision="I32" />
16
- </output>
17
- </layer>
18
- <layer id="2" name="Constant_309036" type="Const" version="opset1">
19
- <data element_type="u8" shape="493443" offset="4" size="493443" />
20
- <output>
21
- <port id="0" precision="U8">
22
- <dim>493443</dim>
23
- </port>
24
- </output>
25
- </layer>
26
- <layer id="3" name="SentencepieceTokenizer_309038" type="SentencepieceTokenizer" version="extension">
27
- <data nbest_size="0" alpha="0" add_bos="true" add_eos="false" reverse="false" />
28
- <input>
29
- <port id="0" precision="U8">
30
- <dim>493443</dim>
31
- </port>
32
- <port id="1" precision="STRING">
33
- <dim>-1</dim>
34
- </port>
35
- </input>
36
- <output>
37
- <port id="2" precision="I64">
38
- <dim>-1</dim>
39
- <dim>2</dim>
40
- </port>
41
- <port id="3" precision="I32">
42
- <dim>-1</dim>
43
- </port>
44
- <port id="4" precision="I64">
45
- <dim>2</dim>
46
- </port>
47
- </output>
48
- </layer>
49
- <layer id="4" name="Broadcast_309043" type="Broadcast" version="opset3">
50
- <data mode="numpy" />
51
- <input>
52
- <port id="0" precision="I32" />
53
- <port id="1" precision="I64">
54
- <dim>2</dim>
55
- </port>
56
- </input>
57
- <output>
58
- <port id="2" precision="I32">
59
- <dim>-1</dim>
60
- <dim>-1</dim>
61
- </port>
62
- </output>
63
- </layer>
64
- <layer id="5" name="Constant_309044" type="Const" version="opset1">
65
- <data element_type="i32" shape="" offset="493447" size="4" />
66
- <output>
67
- <port id="0" precision="I32" />
68
- </output>
69
- </layer>
70
- <layer id="6" name="ShapeOf_309045" type="ShapeOf" version="opset3">
71
- <data output_type="i64" />
72
- <input>
73
- <port id="0" precision="I32">
74
- <dim>-1</dim>
75
- </port>
76
- </input>
77
- <output>
78
- <port id="1" precision="I64">
79
- <dim>1</dim>
80
- </port>
81
- </output>
82
- </layer>
83
- <layer id="7" name="Broadcast_309046" type="Broadcast" version="opset3">
84
- <data mode="numpy" />
85
- <input>
86
- <port id="0" precision="I32" />
87
- <port id="1" precision="I64">
88
- <dim>1</dim>
89
- </port>
90
- </input>
91
- <output>
92
- <port id="2" precision="I32">
93
- <dim>-1</dim>
94
- </port>
95
- </output>
96
- </layer>
97
- <layer id="8" name="ScatterNDUpdate_309050" type="ScatterNDUpdate" version="opset4">
98
- <input>
99
- <port id="0" precision="I32">
100
- <dim>-1</dim>
101
- <dim>-1</dim>
102
- </port>
103
- <port id="1" precision="I64">
104
- <dim>-1</dim>
105
- <dim>2</dim>
106
- </port>
107
- <port id="2" precision="I32">
108
- <dim>-1</dim>
109
- </port>
110
- </input>
111
- <output>
112
- <port id="3" precision="I32">
113
- <dim>-1</dim>
114
- <dim>-1</dim>
115
- </port>
116
- </output>
117
- </layer>
118
- <layer id="9" name="ScatterNDUpdate_309050" type="Convert" version="opset1">
119
- <data destination_type="i64" />
120
- <input>
121
- <port id="0" precision="I32">
122
- <dim>-1</dim>
123
- <dim>-1</dim>
124
- </port>
125
- </input>
126
- <output>
127
- <port id="1" precision="I64" names="attention_mask">
128
- <dim>-1</dim>
129
- <dim>-1</dim>
130
- </port>
131
- </output>
132
- </layer>
133
- <layer id="11" name="Constant_309039" type="Const" version="opset1">
134
- <data element_type="i32" shape="" offset="0" size="4" />
135
- <output>
136
- <port id="0" precision="I32" />
137
- </output>
138
- </layer>
139
- <layer id="12" name="Broadcast_309040" type="Broadcast" version="opset3">
140
- <data mode="numpy" />
141
- <input>
142
- <port id="0" precision="I32" />
143
- <port id="1" precision="I64">
144
- <dim>2</dim>
145
- </port>
146
- </input>
147
- <output>
148
- <port id="2" precision="I32">
149
- <dim>-1</dim>
150
- <dim>-1</dim>
151
- </port>
152
- </output>
153
- </layer>
154
- <layer id="13" name="ScatterNDUpdate_309041" type="ScatterNDUpdate" version="opset4">
155
- <input>
156
- <port id="0" precision="I32">
157
- <dim>-1</dim>
158
- <dim>-1</dim>
159
- </port>
160
- <port id="1" precision="I64">
161
- <dim>-1</dim>
162
- <dim>2</dim>
163
- </port>
164
- <port id="2" precision="I32">
165
- <dim>-1</dim>
166
- </port>
167
- </input>
168
- <output>
169
- <port id="3" precision="I32">
170
- <dim>-1</dim>
171
- <dim>-1</dim>
172
- </port>
173
- </output>
174
- </layer>
175
- <layer id="14" name="ScatterNDUpdate_309041" type="Convert" version="opset1">
176
- <data destination_type="i64" />
177
- <input>
178
- <port id="0" precision="I32">
179
- <dim>-1</dim>
180
- <dim>-1</dim>
181
- </port>
182
- </input>
183
- <output>
184
- <port id="1" precision="I64" names="input_ids">
185
- <dim>-1</dim>
186
- <dim>-1</dim>
187
- </port>
188
- </output>
189
- </layer>
190
- <layer id="15" name="Result_309051" type="Result" version="opset1">
191
- <input>
192
- <port id="0" precision="I64">
193
- <dim>-1</dim>
194
- <dim>-1</dim>
195
- </port>
196
- </input>
197
- </layer>
198
- <layer id="10" name="Result_309052" type="Result" version="opset1">
199
- <input>
200
- <port id="0" precision="I64">
201
- <dim>-1</dim>
202
- <dim>-1</dim>
203
- </port>
204
- </input>
205
- </layer>
206
- </layers>
207
- <edges>
208
- <edge from-layer="0" from-port="0" to-layer="3" to-port="1" />
209
- <edge from-layer="1" from-port="0" to-layer="4" to-port="0" />
210
- <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
211
- <edge from-layer="3" from-port="4" to-layer="4" to-port="1" />
212
- <edge from-layer="3" from-port="3" to-layer="6" to-port="0" />
213
- <edge from-layer="3" from-port="2" to-layer="8" to-port="1" />
214
- <edge from-layer="3" from-port="4" to-layer="12" to-port="1" />
215
- <edge from-layer="3" from-port="2" to-layer="13" to-port="1" />
216
- <edge from-layer="3" from-port="3" to-layer="13" to-port="2" />
217
- <edge from-layer="4" from-port="2" to-layer="8" to-port="0" />
218
- <edge from-layer="5" from-port="0" to-layer="7" to-port="0" />
219
- <edge from-layer="6" from-port="1" to-layer="7" to-port="1" />
220
- <edge from-layer="7" from-port="2" to-layer="8" to-port="2" />
221
- <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
222
- <edge from-layer="9" from-port="1" to-layer="10" to-port="0" />
223
- <edge from-layer="11" from-port="0" to-layer="12" to-port="0" />
224
- <edge from-layer="12" from-port="2" to-layer="13" to-port="0" />
225
- <edge from-layer="13" from-port="3" to-layer="14" to-port="0" />
226
- <edge from-layer="14" from-port="1" to-layer="15" to-port="0" />
227
- </edges>
228
- <rt_info>
229
- <eos_token_id value="2" />
230
- </rt_info>
231
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -29,10 +30,10 @@
29
  },
30
  "additional_special_tokens": [],
31
  "bos_token": "<s>",
32
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
- "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
  "pad_token": null,
38
  "sp_model_kwargs": {},
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
30
  },
31
  "additional_special_tokens": [],
32
  "bos_token": "<s>",
33
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "</s>",
36
+ "legacy": false,
37
  "model_max_length": 1000000000000000019884624838656,
38
  "pad_token": null,
39
  "sp_model_kwargs": {},