aalst commited on
Commit
14a9aef
·
1 Parent(s): 0dded2d

Revert "Upload tokenizer"

Browse files

This reverts commit 0dded2d4d7f80e337090dcfcb786042c2de0c8f2.

Files changed (5) hide show
  1. merges.txt +0 -0
  2. special_tokens_map.json +0 -63
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +0 -356
  5. vocab.json +0 -0
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json DELETED
@@ -1,63 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|endoftext|>",
4
- "<fim_prefix>",
5
- "<fim_middle>",
6
- "<fim_suffix>",
7
- "<fim_pad>",
8
- "<repo_name>",
9
- "<file_sep>",
10
- "<issue_start>",
11
- "<issue_comment>",
12
- "<issue_closed>",
13
- "<jupyter_start>",
14
- "<jupyter_text>",
15
- "<jupyter_code>",
16
- "<jupyter_output>",
17
- "<jupyter_script>",
18
- "<empty_output>",
19
- "<code_to_intermediate>",
20
- "<intermediate_to_code>",
21
- "<pr>",
22
- "<pr_status>",
23
- "<pr_is_merged>",
24
- "<pr_base>",
25
- "<pr_file>",
26
- "<pr_base_code>",
27
- "<pr_diff>",
28
- "<pr_diff_hunk>",
29
- "<pr_comment>",
30
- "<pr_event_id>",
31
- "<pr_review>",
32
- "<pr_review_state>",
33
- "<pr_review_comment>",
34
- "<pr_in_reply_to_review_id>",
35
- "<pr_in_reply_to_comment_id>",
36
- "<pr_diff_hunk_comment_line>",
37
- "<NAME>",
38
- "<EMAIL>",
39
- "<KEY>",
40
- "<PASSWORD>"
41
- ],
42
- "bos_token": {
43
- "content": "<|endoftext|>",
44
- "lstrip": false,
45
- "normalized": false,
46
- "rstrip": false,
47
- "single_word": false
48
- },
49
- "eos_token": {
50
- "content": "<|endoftext|>",
51
- "lstrip": false,
52
- "normalized": false,
53
- "rstrip": false,
54
- "single_word": false
55
- },
56
- "unk_token": {
57
- "content": "<|endoftext|>",
58
- "lstrip": false,
59
- "normalized": false,
60
- "rstrip": false,
61
- "single_word": false
62
- }
63
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1,356 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<fim_prefix>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "<fim_middle>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<fim_suffix>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "4": {
37
- "content": "<fim_pad>",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- },
44
- "5": {
45
- "content": "<repo_name>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "6": {
53
- "content": "<file_sep>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
- },
60
- "7": {
61
- "content": "<issue_start>",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": true
67
- },
68
- "8": {
69
- "content": "<issue_comment>",
70
- "lstrip": false,
71
- "normalized": false,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": true
75
- },
76
- "9": {
77
- "content": "<issue_closed>",
78
- "lstrip": false,
79
- "normalized": false,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": true
83
- },
84
- "10": {
85
- "content": "<jupyter_start>",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": true
91
- },
92
- "11": {
93
- "content": "<jupyter_text>",
94
- "lstrip": false,
95
- "normalized": false,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": true
99
- },
100
- "12": {
101
- "content": "<jupyter_code>",
102
- "lstrip": false,
103
- "normalized": false,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": true
107
- },
108
- "13": {
109
- "content": "<jupyter_output>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": true
115
- },
116
- "14": {
117
- "content": "<jupyter_script>",
118
- "lstrip": false,
119
- "normalized": false,
120
- "rstrip": false,
121
- "single_word": false,
122
- "special": true
123
- },
124
- "15": {
125
- "content": "<empty_output>",
126
- "lstrip": false,
127
- "normalized": false,
128
- "rstrip": false,
129
- "single_word": false,
130
- "special": true
131
- },
132
- "16": {
133
- "content": "<code_to_intermediate>",
134
- "lstrip": false,
135
- "normalized": false,
136
- "rstrip": false,
137
- "single_word": false,
138
- "special": true
139
- },
140
- "17": {
141
- "content": "<intermediate_to_code>",
142
- "lstrip": false,
143
- "normalized": false,
144
- "rstrip": false,
145
- "single_word": false,
146
- "special": true
147
- },
148
- "18": {
149
- "content": "<pr>",
150
- "lstrip": false,
151
- "normalized": false,
152
- "rstrip": false,
153
- "single_word": false,
154
- "special": true
155
- },
156
- "19": {
157
- "content": "<pr_status>",
158
- "lstrip": false,
159
- "normalized": false,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": true
163
- },
164
- "20": {
165
- "content": "<pr_is_merged>",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false,
170
- "special": true
171
- },
172
- "21": {
173
- "content": "<pr_base>",
174
- "lstrip": false,
175
- "normalized": false,
176
- "rstrip": false,
177
- "single_word": false,
178
- "special": true
179
- },
180
- "22": {
181
- "content": "<pr_file>",
182
- "lstrip": false,
183
- "normalized": false,
184
- "rstrip": false,
185
- "single_word": false,
186
- "special": true
187
- },
188
- "23": {
189
- "content": "<pr_base_code>",
190
- "lstrip": false,
191
- "normalized": false,
192
- "rstrip": false,
193
- "single_word": false,
194
- "special": true
195
- },
196
- "24": {
197
- "content": "<pr_diff>",
198
- "lstrip": false,
199
- "normalized": false,
200
- "rstrip": false,
201
- "single_word": false,
202
- "special": true
203
- },
204
- "25": {
205
- "content": "<pr_diff_hunk>",
206
- "lstrip": false,
207
- "normalized": false,
208
- "rstrip": false,
209
- "single_word": false,
210
- "special": true
211
- },
212
- "26": {
213
- "content": "<pr_comment>",
214
- "lstrip": false,
215
- "normalized": false,
216
- "rstrip": false,
217
- "single_word": false,
218
- "special": true
219
- },
220
- "27": {
221
- "content": "<pr_event_id>",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false,
226
- "special": true
227
- },
228
- "28": {
229
- "content": "<pr_review>",
230
- "lstrip": false,
231
- "normalized": false,
232
- "rstrip": false,
233
- "single_word": false,
234
- "special": true
235
- },
236
- "29": {
237
- "content": "<pr_review_state>",
238
- "lstrip": false,
239
- "normalized": false,
240
- "rstrip": false,
241
- "single_word": false,
242
- "special": true
243
- },
244
- "30": {
245
- "content": "<pr_review_comment>",
246
- "lstrip": false,
247
- "normalized": false,
248
- "rstrip": false,
249
- "single_word": false,
250
- "special": true
251
- },
252
- "31": {
253
- "content": "<pr_in_reply_to_review_id>",
254
- "lstrip": false,
255
- "normalized": false,
256
- "rstrip": false,
257
- "single_word": false,
258
- "special": true
259
- },
260
- "32": {
261
- "content": "<pr_in_reply_to_comment_id>",
262
- "lstrip": false,
263
- "normalized": false,
264
- "rstrip": false,
265
- "single_word": false,
266
- "special": true
267
- },
268
- "33": {
269
- "content": "<pr_diff_hunk_comment_line>",
270
- "lstrip": false,
271
- "normalized": false,
272
- "rstrip": false,
273
- "single_word": false,
274
- "special": true
275
- },
276
- "34": {
277
- "content": "<NAME>",
278
- "lstrip": false,
279
- "normalized": false,
280
- "rstrip": false,
281
- "single_word": false,
282
- "special": true
283
- },
284
- "35": {
285
- "content": "<EMAIL>",
286
- "lstrip": false,
287
- "normalized": false,
288
- "rstrip": false,
289
- "single_word": false,
290
- "special": true
291
- },
292
- "36": {
293
- "content": "<KEY>",
294
- "lstrip": false,
295
- "normalized": false,
296
- "rstrip": false,
297
- "single_word": false,
298
- "special": true
299
- },
300
- "37": {
301
- "content": "<PASSWORD>",
302
- "lstrip": false,
303
- "normalized": false,
304
- "rstrip": false,
305
- "single_word": false,
306
- "special": true
307
- }
308
- },
309
- "additional_special_tokens": [
310
- "<|endoftext|>",
311
- "<fim_prefix>",
312
- "<fim_middle>",
313
- "<fim_suffix>",
314
- "<fim_pad>",
315
- "<repo_name>",
316
- "<file_sep>",
317
- "<issue_start>",
318
- "<issue_comment>",
319
- "<issue_closed>",
320
- "<jupyter_start>",
321
- "<jupyter_text>",
322
- "<jupyter_code>",
323
- "<jupyter_output>",
324
- "<jupyter_script>",
325
- "<empty_output>",
326
- "<code_to_intermediate>",
327
- "<intermediate_to_code>",
328
- "<pr>",
329
- "<pr_status>",
330
- "<pr_is_merged>",
331
- "<pr_base>",
332
- "<pr_file>",
333
- "<pr_base_code>",
334
- "<pr_diff>",
335
- "<pr_diff_hunk>",
336
- "<pr_comment>",
337
- "<pr_event_id>",
338
- "<pr_review>",
339
- "<pr_review_state>",
340
- "<pr_review_comment>",
341
- "<pr_in_reply_to_review_id>",
342
- "<pr_in_reply_to_comment_id>",
343
- "<pr_diff_hunk_comment_line>",
344
- "<NAME>",
345
- "<EMAIL>",
346
- "<KEY>",
347
- "<PASSWORD>"
348
- ],
349
- "bos_token": "<|endoftext|>",
350
- "clean_up_tokenization_spaces": true,
351
- "eos_token": "<|endoftext|>",
352
- "model_max_length": 1000000000000000019884624838656,
353
- "tokenizer_class": "GPT2Tokenizer",
354
- "unk_token": "<|endoftext|>",
355
- "vocab_size": 49152
356
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vocab.json DELETED
The diff for this file is too large to render. See raw diff