kraalfar commited on
Commit
4b673f2
·
verified ·
1 Parent(s): 7e6e533

Delete tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +0 -234
tokenizer_config.json DELETED
@@ -1,234 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": true,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "32000": {
7
- "content": "õ",
8
- "lstrip": false,
9
- "normalized": true,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": false
13
- },
14
- "32001": {
15
- "content": "÷",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "32002": {
23
- "content": "Á",
24
- "lstrip": false,
25
- "normalized": true,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "32003": {
31
- "content": "ý",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": false
37
- },
38
- "32004": {
39
- "content": "À",
40
- "lstrip": false,
41
- "normalized": true,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "32005": {
47
- "content": "ÿ",
48
- "lstrip": false,
49
- "normalized": true,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "32006": {
55
- "content": "ø",
56
- "lstrip": false,
57
- "normalized": true,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "32007": {
63
- "content": "ú",
64
- "lstrip": false,
65
- "normalized": true,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "32008": {
71
- "content": "þ",
72
- "lstrip": false,
73
- "normalized": true,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "32009": {
79
- "content": "ü",
80
- "lstrip": false,
81
- "normalized": true,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "32010": {
87
- "content": "ù",
88
- "lstrip": false,
89
- "normalized": true,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "32011": {
95
- "content": "ö",
96
- "lstrip": false,
97
- "normalized": true,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": false
101
- },
102
- "32012": {
103
- "content": "û",
104
- "lstrip": false,
105
- "normalized": true,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": false
109
- },
110
- "32013": {
111
- "content": "<|begin▁of▁sentence|>",
112
- "lstrip": false,
113
- "normalized": true,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "32014": {
119
- "content": "<|end▁of▁sentence|>",
120
- "lstrip": false,
121
- "normalized": true,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": true
125
- },
126
- "32015": {
127
- "content": "<|fim▁hole|>",
128
- "lstrip": false,
129
- "normalized": true,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "32016": {
135
- "content": "<|fim▁begin|>",
136
- "lstrip": false,
137
- "normalized": true,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "32017": {
143
- "content": "<|fim▁end|>",
144
- "lstrip": false,
145
- "normalized": true,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "32018": {
151
- "content": "<pad>",
152
- "lstrip": false,
153
- "normalized": true,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "32019": {
159
- "content": "<|User|>",
160
- "lstrip": false,
161
- "normalized": true,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "32020": {
167
- "content": "<|Assistant|>",
168
- "lstrip": false,
169
- "normalized": true,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "32021": {
175
- "content": "<|EOT|>",
176
- "lstrip": false,
177
- "normalized": true,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": true
181
- },
182
- "32022": {
183
- "content": "<|CLS|>",
184
- "lstrip": false,
185
- "normalized": true,
186
- "rstrip": false,
187
- "single_word": false,
188
- "special": false
189
- },
190
- "32023": {
191
- "content": "<|ADD_AFTER|>",
192
- "lstrip": false,
193
- "normalized": true,
194
- "rstrip": false,
195
- "single_word": false,
196
- "special": false
197
- },
198
- "32024": {
199
- "content": "<|ADD_START|>",
200
- "lstrip": false,
201
- "normalized": true,
202
- "rstrip": false,
203
- "single_word": false,
204
- "special": false
205
- },
206
- "32025": {
207
- "content": "<|ADD_END|>",
208
- "lstrip": false,
209
- "normalized": true,
210
- "rstrip": false,
211
- "single_word": false,
212
- "special": false
213
- },
214
- "32026": {
215
- "content": "<|REMOVE|>",
216
- "lstrip": false,
217
- "normalized": true,
218
- "rstrip": false,
219
- "single_word": false,
220
- "special": false
221
- }
222
- },
223
- "bos_token": "<|begin▁of▁sentence|>",
224
- "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
225
- "clean_up_tokenization_spaces": false,
226
- "eos_token": "<|EOT|>",
227
- "legacy": true,
228
- "model_max_length": 16384,
229
- "pad_token": "<|end▁of▁sentence|>",
230
- "sp_model_kwargs": {},
231
- "tokenizer_class": "LlamaTokenizer",
232
- "unk_token": null,
233
- "use_default_system_prompt": false
234
- }