chkiic commited on
Commit
ba891be
·
verified ·
1 Parent(s): a9e2502

add tokenizers

Browse files
openvino_detokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f14f63332691bc4c612d6f6c629c6c547d3224dbe12a58df3bb9f6e191737d0
3
+ size 338966
openvino_detokenizer.xml ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_122" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_122">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Convert_149" type="Convert" version="opset1">
14
+ <data destination_type="i32" />
15
+ <input>
16
+ <port id="0" precision="I64">
17
+ <dim>-1</dim>
18
+ <dim>-1</dim>
19
+ </port>
20
+ </input>
21
+ <output>
22
+ <port id="1" precision="I32">
23
+ <dim>-1</dim>
24
+ <dim>-1</dim>
25
+ </port>
26
+ </output>
27
+ </layer>
28
+ <layer id="2" name="Constant_89" type="Const" version="opset1">
29
+ <data element_type="u8" shape="338927" offset="0" size="338927" />
30
+ <output>
31
+ <port id="0" precision="U8">
32
+ <dim>338927</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="StringTensorUnpack_90" type="StringTensorUnpack" version="extension">
37
+ <data mode="begins_ends" />
38
+ <input>
39
+ <port id="0" precision="U8">
40
+ <dim>338927</dim>
41
+ </port>
42
+ </input>
43
+ <output>
44
+ <port id="1" precision="I32">
45
+ <dim>-1</dim>
46
+ </port>
47
+ <port id="2" precision="I32">
48
+ <dim>-1</dim>
49
+ </port>
50
+ <port id="3" precision="U8">
51
+ <dim>-1</dim>
52
+ </port>
53
+ </output>
54
+ </layer>
55
+ <layer id="4" name="Constant_126" type="Const" version="opset1">
56
+ <data element_type="i32" shape="3" offset="338927" size="12" />
57
+ <output>
58
+ <port id="0" precision="I32">
59
+ <dim>3</dim>
60
+ </port>
61
+ </output>
62
+ </layer>
63
+ <layer id="5" name="Constant_124" type="Const" version="opset1">
64
+ <data element_type="i32" shape="1" offset="338939" size="4" />
65
+ <output>
66
+ <port id="0" precision="I32">
67
+ <dim>1</dim>
68
+ </port>
69
+ </output>
70
+ </layer>
71
+ <layer id="6" name="Constant_123" type="Const" version="opset1">
72
+ <data element_type="i32" shape="1" offset="338943" size="4" />
73
+ <output>
74
+ <port id="0" precision="I32">
75
+ <dim>1</dim>
76
+ </port>
77
+ </output>
78
+ </layer>
79
+ <layer id="7" name="Constant_125" type="Const" version="opset1">
80
+ <data element_type="i32" shape="1" offset="338947" size="4" />
81
+ <output>
82
+ <port id="0" precision="I32">
83
+ <dim>1</dim>
84
+ </port>
85
+ </output>
86
+ </layer>
87
+ <layer id="8" name="Constant_128" type="Const" version="opset1">
88
+ <data element_type="i64" shape="1" offset="338951" size="8" />
89
+ <output>
90
+ <port id="0" precision="I64">
91
+ <dim>1</dim>
92
+ </port>
93
+ </output>
94
+ </layer>
95
+ <layer id="9" name="Slice_127" type="Slice" version="opset8">
96
+ <input>
97
+ <port id="0" precision="I32">
98
+ <dim>3</dim>
99
+ </port>
100
+ <port id="1" precision="I32">
101
+ <dim>1</dim>
102
+ </port>
103
+ <port id="2" precision="I32">
104
+ <dim>1</dim>
105
+ </port>
106
+ <port id="3" precision="I32">
107
+ <dim>1</dim>
108
+ </port>
109
+ <port id="4" precision="I64">
110
+ <dim>1</dim>
111
+ </port>
112
+ </input>
113
+ <output>
114
+ <port id="5" precision="I32">
115
+ <dim>3</dim>
116
+ </port>
117
+ </output>
118
+ </layer>
119
+ <layer id="10" name="VocabDecoder_129" type="VocabDecoder" version="extension">
120
+ <data skip_tokens="" />
121
+ <input>
122
+ <port id="0" precision="I32">
123
+ <dim>-1</dim>
124
+ <dim>-1</dim>
125
+ </port>
126
+ <port id="1" precision="I32">
127
+ <dim>-1</dim>
128
+ </port>
129
+ <port id="2" precision="I32">
130
+ <dim>-1</dim>
131
+ </port>
132
+ <port id="3" precision="U8">
133
+ <dim>-1</dim>
134
+ </port>
135
+ <port id="4" precision="I32">
136
+ <dim>3</dim>
137
+ </port>
138
+ </input>
139
+ <output>
140
+ <port id="5" precision="I32">
141
+ <dim>-1</dim>
142
+ </port>
143
+ <port id="6" precision="I32">
144
+ <dim>-1</dim>
145
+ </port>
146
+ <port id="7" precision="I32">
147
+ <dim>-1</dim>
148
+ </port>
149
+ <port id="8" precision="I32">
150
+ <dim>-1</dim>
151
+ </port>
152
+ <port id="9" precision="U8">
153
+ <dim>-1</dim>
154
+ </port>
155
+ </output>
156
+ </layer>
157
+ <layer id="11" name="Constant_131" type="Const" version="opset1">
158
+ <data element_type="u8" shape="3" offset="338959" size="3" />
159
+ <output>
160
+ <port id="0" precision="U8">
161
+ <dim>3</dim>
162
+ </port>
163
+ </output>
164
+ </layer>
165
+ <layer id="12" name="Constant_133" type="Const" version="opset1">
166
+ <data element_type="u8" shape="1" offset="338962" size="1" />
167
+ <output>
168
+ <port id="0" precision="U8">
169
+ <dim>1</dim>
170
+ </port>
171
+ </output>
172
+ </layer>
173
+ <layer id="13" name="RegexNormalization_134" type="RegexNormalization" version="extension">
174
+ <data global_replace="true" />
175
+ <input>
176
+ <port id="0" precision="I32">
177
+ <dim>-1</dim>
178
+ </port>
179
+ <port id="1" precision="I32">
180
+ <dim>-1</dim>
181
+ </port>
182
+ <port id="2" precision="U8">
183
+ <dim>-1</dim>
184
+ </port>
185
+ <port id="3" precision="U8">
186
+ <dim>3</dim>
187
+ </port>
188
+ <port id="4" precision="U8">
189
+ <dim>1</dim>
190
+ </port>
191
+ </input>
192
+ <output>
193
+ <port id="5" precision="I32">
194
+ <dim>-1</dim>
195
+ </port>
196
+ <port id="6" precision="I32">
197
+ <dim>-1</dim>
198
+ </port>
199
+ <port id="7" precision="U8">
200
+ <dim>-1</dim>
201
+ </port>
202
+ </output>
203
+ </layer>
204
+ <layer id="14" name="ByteFallback_135" type="ByteFallback" version="extension">
205
+ <input>
206
+ <port id="0" precision="I32">
207
+ <dim>-1</dim>
208
+ </port>
209
+ <port id="1" precision="I32">
210
+ <dim>-1</dim>
211
+ </port>
212
+ <port id="2" precision="U8">
213
+ <dim>-1</dim>
214
+ </port>
215
+ </input>
216
+ <output>
217
+ <port id="3" precision="I32">
218
+ <dim>-1</dim>
219
+ </port>
220
+ <port id="4" precision="I32">
221
+ <dim>-1</dim>
222
+ </port>
223
+ <port id="5" precision="U8">
224
+ <dim>-1</dim>
225
+ </port>
226
+ </output>
227
+ </layer>
228
+ <layer id="15" name="FuzeRagged_136" type="FuzeRagged" version="extension">
229
+ <input>
230
+ <port id="0" precision="I32">
231
+ <dim>-1</dim>
232
+ </port>
233
+ <port id="1" precision="I32">
234
+ <dim>-1</dim>
235
+ </port>
236
+ <port id="2" precision="I32">
237
+ <dim>-1</dim>
238
+ </port>
239
+ <port id="3" precision="I32">
240
+ <dim>-1</dim>
241
+ </port>
242
+ </input>
243
+ <output>
244
+ <port id="4" precision="I32">
245
+ <dim>-1</dim>
246
+ </port>
247
+ <port id="5" precision="I32">
248
+ <dim>-1</dim>
249
+ </port>
250
+ </output>
251
+ </layer>
252
+ <layer id="16" name="Constant_138" type="Const" version="opset1">
253
+ <data element_type="u8" shape="2" offset="338963" size="2" />
254
+ <output>
255
+ <port id="0" precision="U8">
256
+ <dim>2</dim>
257
+ </port>
258
+ </output>
259
+ </layer>
260
+ <layer id="17" name="Constant_140" type="Const" version="opset1">
261
+ <data element_type="u8" shape="0" offset="338965" size="1" />
262
+ <output>
263
+ <port id="0" precision="U8">
264
+ <dim>0</dim>
265
+ </port>
266
+ </output>
267
+ </layer>
268
+ <layer id="18" name="RegexNormalization_141" type="RegexNormalization" version="extension">
269
+ <data global_replace="true" />
270
+ <input>
271
+ <port id="0" precision="I32">
272
+ <dim>-1</dim>
273
+ </port>
274
+ <port id="1" precision="I32">
275
+ <dim>-1</dim>
276
+ </port>
277
+ <port id="2" precision="U8">
278
+ <dim>-1</dim>
279
+ </port>
280
+ <port id="3" precision="U8">
281
+ <dim>2</dim>
282
+ </port>
283
+ <port id="4" precision="U8">
284
+ <dim>0</dim>
285
+ </port>
286
+ </input>
287
+ <output>
288
+ <port id="5" precision="I32">
289
+ <dim>-1</dim>
290
+ </port>
291
+ <port id="6" precision="I32">
292
+ <dim>-1</dim>
293
+ </port>
294
+ <port id="7" precision="U8">
295
+ <dim>-1</dim>
296
+ </port>
297
+ </output>
298
+ </layer>
299
+ <layer id="19" name="UTF8Validate_142" type="UTF8Validate" version="extension">
300
+ <data replace_mode="true" />
301
+ <input>
302
+ <port id="0" precision="I32">
303
+ <dim>-1</dim>
304
+ </port>
305
+ <port id="1" precision="I32">
306
+ <dim>-1</dim>
307
+ </port>
308
+ <port id="2" precision="U8">
309
+ <dim>-1</dim>
310
+ </port>
311
+ </input>
312
+ <output>
313
+ <port id="3" precision="I32">
314
+ <dim>-1</dim>
315
+ </port>
316
+ <port id="4" precision="I32">
317
+ <dim>-1</dim>
318
+ </port>
319
+ <port id="5" precision="U8">
320
+ <dim>-1</dim>
321
+ </port>
322
+ </output>
323
+ </layer>
324
+ <layer id="20" name="StringTensorPack_143" type="StringTensorPack" version="extension">
325
+ <data mode="begins_ends" />
326
+ <input>
327
+ <port id="0" precision="I32">
328
+ <dim>-1</dim>
329
+ </port>
330
+ <port id="1" precision="I32">
331
+ <dim>-1</dim>
332
+ </port>
333
+ <port id="2" precision="U8">
334
+ <dim>-1</dim>
335
+ </port>
336
+ </input>
337
+ <output>
338
+ <port id="3" precision="STRING" names="string_output">
339
+ <dim>-1</dim>
340
+ </port>
341
+ </output>
342
+ </layer>
343
+ <layer id="21" name="Result_144" type="Result" version="opset1">
344
+ <input>
345
+ <port id="0" precision="STRING">
346
+ <dim>-1</dim>
347
+ </port>
348
+ </input>
349
+ </layer>
350
+ </layers>
351
+ <edges>
352
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
353
+ <edge from-layer="1" from-port="1" to-layer="10" to-port="0" />
354
+ <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
355
+ <edge from-layer="3" from-port="3" to-layer="10" to-port="3" />
356
+ <edge from-layer="3" from-port="2" to-layer="10" to-port="2" />
357
+ <edge from-layer="3" from-port="1" to-layer="10" to-port="1" />
358
+ <edge from-layer="4" from-port="0" to-layer="9" to-port="0" />
359
+ <edge from-layer="5" from-port="0" to-layer="9" to-port="1" />
360
+ <edge from-layer="6" from-port="0" to-layer="9" to-port="2" />
361
+ <edge from-layer="7" from-port="0" to-layer="9" to-port="3" />
362
+ <edge from-layer="8" from-port="0" to-layer="9" to-port="4" />
363
+ <edge from-layer="9" from-port="5" to-layer="10" to-port="4" />
364
+ <edge from-layer="10" from-port="7" to-layer="13" to-port="0" />
365
+ <edge from-layer="10" from-port="8" to-layer="13" to-port="1" />
366
+ <edge from-layer="10" from-port="9" to-layer="13" to-port="2" />
367
+ <edge from-layer="10" from-port="6" to-layer="15" to-port="1" />
368
+ <edge from-layer="10" from-port="5" to-layer="15" to-port="0" />
369
+ <edge from-layer="11" from-port="0" to-layer="13" to-port="3" />
370
+ <edge from-layer="12" from-port="0" to-layer="13" to-port="4" />
371
+ <edge from-layer="13" from-port="6" to-layer="14" to-port="1" />
372
+ <edge from-layer="13" from-port="7" to-layer="14" to-port="2" />
373
+ <edge from-layer="13" from-port="5" to-layer="14" to-port="0" />
374
+ <edge from-layer="14" from-port="3" to-layer="15" to-port="2" />
375
+ <edge from-layer="14" from-port="4" to-layer="15" to-port="3" />
376
+ <edge from-layer="14" from-port="5" to-layer="18" to-port="2" />
377
+ <edge from-layer="15" from-port="4" to-layer="18" to-port="0" />
378
+ <edge from-layer="15" from-port="5" to-layer="18" to-port="1" />
379
+ <edge from-layer="16" from-port="0" to-layer="18" to-port="3" />
380
+ <edge from-layer="17" from-port="0" to-layer="18" to-port="4" />
381
+ <edge from-layer="18" from-port="5" to-layer="19" to-port="0" />
382
+ <edge from-layer="18" from-port="6" to-layer="19" to-port="1" />
383
+ <edge from-layer="18" from-port="7" to-layer="19" to-port="2" />
384
+ <edge from-layer="19" from-port="3" to-layer="20" to-port="0" />
385
+ <edge from-layer="19" from-port="4" to-layer="20" to-port="1" />
386
+ <edge from-layer="19" from-port="5" to-layer="20" to-port="2" />
387
+ <edge from-layer="20" from-port="3" to-layer="21" to-port="0" />
388
+ </edges>
389
+ <rt_info>
390
+ <add_attention_mask value="True" />
391
+ <add_prefix_space />
392
+ <add_special_tokens value="True" />
393
+ <bos_token_id value="1" />
394
+ <chat_template value="{% for message in messages %}&#10;{% if message['role'] == 'user' %}&#10;{{ '&lt;|user|>&#10;' + message['content'] + eos_token }}&#10;{% elif message['role'] == 'system' %}&#10;{{ '&lt;|system|>&#10;' + message['content'] + eos_token }}&#10;{% elif message['role'] == 'assistant' %}&#10;{{ '&lt;|assistant|>&#10;' + message['content'] + eos_token }}&#10;{% endif %}&#10;{% if loop.last and add_generation_prompt %}&#10;{{ '&lt;|assistant|>' }}&#10;{% endif %}&#10;{% endfor %}" />
395
+ <clean_up_tokenization_spaces />
396
+ <detokenizer_input_type value="i64" />
397
+ <eos_token_id value="2" />
398
+ <handle_special_tokens_with_re value="False" />
399
+ <number_of_inputs value="1" />
400
+ <openvino_tokenizers_version value="2025.0.0.0" />
401
+ <openvino_version value="2025.0.0" />
402
+ <original_tokenizer_class value="&lt;class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>" />
403
+ <pad_token_id value="2" />
404
+ <sentencepiece_version value="0.2.0" />
405
+ <skip_special_tokens value="True" />
406
+ <streaming_detokenizer value="False" />
407
+ <tiktoken_version value="0.8.0" />
408
+ <tokenizer_output_type value="i64" />
409
+ <tokenizers_version value="0.20.3" />
410
+ <transformers_version value="4.46.3" />
411
+ <use_max_padding value="False" />
412
+ <use_sentencepiece_backend value="True" />
413
+ <utf8_replace_mode value="replace" />
414
+ <with_detokenizer value="True" />
415
+ </rt_info>
416
+ </net>