CeciGonSer commited on
Commit
7aa2a85
·
verified ·
1 Parent(s): b7d904e

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,37 +1,58 @@
1
  {
2
- "_name_or_path": "facebook/m2m100_418M",
3
- "activation_dropout": 0.0,
 
4
  "activation_function": "relu",
 
 
5
  "architectures": [
6
- "M2M100ForConditionalGeneration"
7
  ],
8
- "attention_dropout": 0.1,
9
  "bos_token_id": 0,
 
 
10
  "d_model": 1024,
11
  "decoder_attention_heads": 16,
12
  "decoder_ffn_dim": 4096,
13
- "decoder_layerdrop": 0.05,
14
  "decoder_layers": 12,
15
  "decoder_start_token_id": 2,
16
  "dropout": 0.1,
17
  "early_stopping": true,
18
  "encoder_attention_heads": 16,
19
  "encoder_ffn_dim": 4096,
20
- "encoder_layerdrop": 0.05,
21
  "encoder_layers": 12,
22
  "eos_token_id": 2,
 
23
  "gradient_checkpointing": false,
 
 
 
 
 
24
  "init_std": 0.02,
25
  "is_encoder_decoder": true,
 
 
 
 
 
26
  "max_length": 200,
27
  "max_position_embeddings": 1024,
28
- "model_type": "m2m_100",
 
 
29
  "num_beams": 5,
30
  "num_hidden_layers": 12,
 
31
  "pad_token_id": 1,
32
  "scale_embedding": true,
 
 
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.40.0",
35
  "use_cache": true,
36
- "vocab_size": 128112
37
  }
 
1
  {
2
+ "_name_or_path": "facebook/mbart-large-50-many-to-many-mmt",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.3,
5
  "activation_function": "relu",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": true,
8
  "architectures": [
9
+ "MBartForConditionalGeneration"
10
  ],
11
+ "attention_dropout": 0.3,
12
  "bos_token_id": 0,
13
+ "classif_dropout": 0.0,
14
+ "classifier_dropout": 0.3,
15
  "d_model": 1024,
16
  "decoder_attention_heads": 16,
17
  "decoder_ffn_dim": 4096,
18
+ "decoder_layerdrop": 0.0,
19
  "decoder_layers": 12,
20
  "decoder_start_token_id": 2,
21
  "dropout": 0.1,
22
  "early_stopping": true,
23
  "encoder_attention_heads": 16,
24
  "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
  "encoder_layers": 12,
27
  "eos_token_id": 2,
28
+ "forced_eos_token_id": 2,
29
  "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
  "init_std": 0.02,
36
  "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
  "max_length": 200,
43
  "max_position_embeddings": 1024,
44
+ "model_type": "mbart",
45
+ "normalize_before": true,
46
+ "normalize_embedding": true,
47
  "num_beams": 5,
48
  "num_hidden_layers": 12,
49
+ "output_past": true,
50
  "pad_token_id": 1,
51
  "scale_embedding": true,
52
+ "static_position_embeddings": false,
53
+ "tokenizer_class": "MBart50Tokenizer",
54
  "torch_dtype": "float32",
55
  "transformers_version": "4.40.0",
56
  "use_cache": true,
57
+ "vocab_size": 250054
58
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b343d338942fe5ecf7a1720ceb10c1d1bf908b0396b2c1e60c315fb95db5c1ec
3
- size 1935681888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b584171878d0d6c3f7d814757306cdcd674240ba4720ee5706aab105a6a309
3
+ size 2444578688
sentencepiece.bpe.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a
3
- size 2423393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json CHANGED
@@ -1,108 +1,68 @@
1
  {
2
  "additional_special_tokens": [
3
- "__af__",
4
- "__am__",
5
- "__ar__",
6
- "__ast__",
7
- "__az__",
8
- "__ba__",
9
- "__be__",
10
- "__bg__",
11
- "__bn__",
12
- "__br__",
13
- "__bs__",
14
- "__ca__",
15
- "__ceb__",
16
- "__cs__",
17
- "__cy__",
18
- "__da__",
19
- "__de__",
20
- "__el__",
21
- "__en__",
22
- "__es__",
23
- "__et__",
24
- "__fa__",
25
- "__ff__",
26
- "__fi__",
27
- "__fr__",
28
- "__fy__",
29
- "__ga__",
30
- "__gd__",
31
- "__gl__",
32
- "__gu__",
33
- "__ha__",
34
- "__he__",
35
- "__hi__",
36
- "__hr__",
37
- "__ht__",
38
- "__hu__",
39
- "__hy__",
40
- "__id__",
41
- "__ig__",
42
- "__ilo__",
43
- "__is__",
44
- "__it__",
45
- "__ja__",
46
- "__jv__",
47
- "__ka__",
48
- "__kk__",
49
- "__km__",
50
- "__kn__",
51
- "__ko__",
52
- "__lb__",
53
- "__lg__",
54
- "__ln__",
55
- "__lo__",
56
- "__lt__",
57
- "__lv__",
58
- "__mg__",
59
- "__mk__",
60
- "__ml__",
61
- "__mn__",
62
- "__mr__",
63
- "__ms__",
64
- "__my__",
65
- "__ne__",
66
- "__nl__",
67
- "__no__",
68
- "__ns__",
69
- "__oc__",
70
- "__or__",
71
- "__pa__",
72
- "__pl__",
73
- "__ps__",
74
- "__pt__",
75
- "__ro__",
76
- "__ru__",
77
- "__sd__",
78
- "__si__",
79
- "__sk__",
80
- "__sl__",
81
- "__so__",
82
- "__sq__",
83
- "__sr__",
84
- "__ss__",
85
- "__su__",
86
- "__sv__",
87
- "__sw__",
88
- "__ta__",
89
- "__th__",
90
- "__tl__",
91
- "__tn__",
92
- "__tr__",
93
- "__uk__",
94
- "__ur__",
95
- "__uz__",
96
- "__vi__",
97
- "__wo__",
98
- "__xh__",
99
- "__yi__",
100
- "__yo__",
101
- "__zh__",
102
- "__zu__"
103
  ],
104
  "bos_token": "<s>",
 
105
  "eos_token": "</s>",
 
 
 
 
 
 
 
106
  "pad_token": "<pad>",
107
  "sep_token": "</s>",
108
  "unk_token": "<unk>"
 
1
  {
2
  "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ],
56
  "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
  "eos_token": "</s>",
59
+ "mask_token": {
60
+ "content": "<mask>",
61
+ "lstrip": true,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
  "pad_token": "<pad>",
67
  "sep_token": "</s>",
68
  "unk_token": "<unk>"
tokenizer_config.json CHANGED
@@ -32,920 +32,497 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "128004": {
36
- "content": "__af__",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  },
43
- "128005": {
44
- "content": "__am__",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
  },
51
- "128006": {
52
- "content": "__ar__",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
56
  "single_word": false,
57
  "special": true
58
  },
59
- "128007": {
60
- "content": "__ast__",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
  },
67
- "128008": {
68
- "content": "__az__",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
72
  "single_word": false,
73
  "special": true
74
  },
75
- "128009": {
76
- "content": "__ba__",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
80
  "single_word": false,
81
  "special": true
82
  },
83
- "128010": {
84
- "content": "__be__",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
88
  "single_word": false,
89
  "special": true
90
  },
91
- "128011": {
92
- "content": "__bg__",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
  "special": true
98
  },
99
- "128012": {
100
- "content": "__bn__",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
104
  "single_word": false,
105
  "special": true
106
  },
107
- "128013": {
108
- "content": "__br__",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
  "special": true
114
  },
115
- "128014": {
116
- "content": "__bs__",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false,
121
  "special": true
122
  },
123
- "128015": {
124
- "content": "__ca__",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
128
  "single_word": false,
129
  "special": true
130
  },
131
- "128016": {
132
- "content": "__ceb__",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
136
  "single_word": false,
137
  "special": true
138
  },
139
- "128017": {
140
- "content": "__cs__",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
144
  "single_word": false,
145
  "special": true
146
  },
147
- "128018": {
148
- "content": "__cy__",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
152
  "single_word": false,
153
  "special": true
154
  },
155
- "128019": {
156
- "content": "__da__",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
160
  "single_word": false,
161
  "special": true
162
  },
163
- "128020": {
164
- "content": "__de__",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
168
  "single_word": false,
169
  "special": true
170
  },
171
- "128021": {
172
- "content": "__el__",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
176
  "single_word": false,
177
  "special": true
178
  },
179
- "128022": {
180
- "content": "__en__",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
184
  "single_word": false,
185
  "special": true
186
  },
187
- "128023": {
188
- "content": "__es__",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
192
  "single_word": false,
193
  "special": true
194
  },
195
- "128024": {
196
- "content": "__et__",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
200
  "single_word": false,
201
  "special": true
202
  },
203
- "128025": {
204
- "content": "__fa__",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
208
  "single_word": false,
209
  "special": true
210
  },
211
- "128026": {
212
- "content": "__ff__",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
216
  "single_word": false,
217
  "special": true
218
  },
219
- "128027": {
220
- "content": "__fi__",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
224
  "single_word": false,
225
  "special": true
226
  },
227
- "128028": {
228
- "content": "__fr__",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
232
  "single_word": false,
233
  "special": true
234
  },
235
- "128029": {
236
- "content": "__fy__",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
240
  "single_word": false,
241
  "special": true
242
  },
243
- "128030": {
244
- "content": "__ga__",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
248
  "single_word": false,
249
  "special": true
250
  },
251
- "128031": {
252
- "content": "__gd__",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
256
  "single_word": false,
257
  "special": true
258
  },
259
- "128032": {
260
- "content": "__gl__",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
264
  "single_word": false,
265
  "special": true
266
  },
267
- "128033": {
268
- "content": "__gu__",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
272
  "single_word": false,
273
  "special": true
274
  },
275
- "128034": {
276
- "content": "__ha__",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
280
  "single_word": false,
281
  "special": true
282
  },
283
- "128035": {
284
- "content": "__he__",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
288
  "single_word": false,
289
  "special": true
290
  },
291
- "128036": {
292
- "content": "__hi__",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
296
  "single_word": false,
297
  "special": true
298
  },
299
- "128037": {
300
- "content": "__hr__",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
304
  "single_word": false,
305
  "special": true
306
  },
307
- "128038": {
308
- "content": "__ht__",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
312
  "single_word": false,
313
  "special": true
314
  },
315
- "128039": {
316
- "content": "__hu__",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
320
  "single_word": false,
321
  "special": true
322
  },
323
- "128040": {
324
- "content": "__hy__",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
328
  "single_word": false,
329
  "special": true
330
  },
331
- "128041": {
332
- "content": "__id__",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
336
  "single_word": false,
337
  "special": true
338
  },
339
- "128042": {
340
- "content": "__ig__",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
344
  "single_word": false,
345
  "special": true
346
  },
347
- "128043": {
348
- "content": "__ilo__",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
352
  "single_word": false,
353
  "special": true
354
  },
355
- "128044": {
356
- "content": "__is__",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
360
  "single_word": false,
361
  "special": true
362
  },
363
- "128045": {
364
- "content": "__it__",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
368
  "single_word": false,
369
  "special": true
370
  },
371
- "128046": {
372
- "content": "__ja__",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
376
  "single_word": false,
377
  "special": true
378
  },
379
- "128047": {
380
- "content": "__jv__",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
384
  "single_word": false,
385
  "special": true
386
  },
387
- "128048": {
388
- "content": "__ka__",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
392
  "single_word": false,
393
  "special": true
394
  },
395
- "128049": {
396
- "content": "__kk__",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
400
  "single_word": false,
401
  "special": true
402
  },
403
- "128050": {
404
- "content": "__km__",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
408
  "single_word": false,
409
  "special": true
410
  },
411
- "128051": {
412
- "content": "__kn__",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
416
  "single_word": false,
417
  "special": true
418
  },
419
- "128052": {
420
- "content": "__ko__",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
424
  "single_word": false,
425
  "special": true
426
  },
427
- "128053": {
428
- "content": "__lb__",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
432
  "single_word": false,
433
  "special": true
434
  },
435
- "128054": {
436
- "content": "__lg__",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
440
  "single_word": false,
441
  "special": true
442
  },
443
- "128055": {
444
- "content": "__ln__",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
448
  "single_word": false,
449
  "special": true
450
  },
451
- "128056": {
452
- "content": "__lo__",
453
- "lstrip": false,
454
- "normalized": false,
455
- "rstrip": false,
456
- "single_word": false,
457
- "special": true
458
- },
459
- "128057": {
460
- "content": "__lt__",
461
- "lstrip": false,
462
- "normalized": false,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": true
466
- },
467
- "128058": {
468
- "content": "__lv__",
469
- "lstrip": false,
470
- "normalized": false,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": true
474
- },
475
- "128059": {
476
- "content": "__mg__",
477
- "lstrip": false,
478
- "normalized": false,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": true
482
- },
483
- "128060": {
484
- "content": "__mk__",
485
- "lstrip": false,
486
- "normalized": false,
487
- "rstrip": false,
488
- "single_word": false,
489
- "special": true
490
- },
491
- "128061": {
492
- "content": "__ml__",
493
- "lstrip": false,
494
- "normalized": false,
495
- "rstrip": false,
496
- "single_word": false,
497
- "special": true
498
- },
499
- "128062": {
500
- "content": "__mn__",
501
- "lstrip": false,
502
- "normalized": false,
503
- "rstrip": false,
504
- "single_word": false,
505
- "special": true
506
- },
507
- "128063": {
508
- "content": "__mr__",
509
- "lstrip": false,
510
- "normalized": false,
511
- "rstrip": false,
512
- "single_word": false,
513
- "special": true
514
- },
515
- "128064": {
516
- "content": "__ms__",
517
- "lstrip": false,
518
- "normalized": false,
519
- "rstrip": false,
520
- "single_word": false,
521
- "special": true
522
- },
523
- "128065": {
524
- "content": "__my__",
525
- "lstrip": false,
526
- "normalized": false,
527
- "rstrip": false,
528
- "single_word": false,
529
- "special": true
530
- },
531
- "128066": {
532
- "content": "__ne__",
533
- "lstrip": false,
534
- "normalized": false,
535
- "rstrip": false,
536
- "single_word": false,
537
- "special": true
538
- },
539
- "128067": {
540
- "content": "__nl__",
541
- "lstrip": false,
542
- "normalized": false,
543
- "rstrip": false,
544
- "single_word": false,
545
- "special": true
546
- },
547
- "128068": {
548
- "content": "__no__",
549
- "lstrip": false,
550
- "normalized": false,
551
- "rstrip": false,
552
- "single_word": false,
553
- "special": true
554
- },
555
- "128069": {
556
- "content": "__ns__",
557
- "lstrip": false,
558
- "normalized": false,
559
- "rstrip": false,
560
- "single_word": false,
561
- "special": true
562
- },
563
- "128070": {
564
- "content": "__oc__",
565
- "lstrip": false,
566
- "normalized": false,
567
- "rstrip": false,
568
- "single_word": false,
569
- "special": true
570
- },
571
- "128071": {
572
- "content": "__or__",
573
- "lstrip": false,
574
- "normalized": false,
575
- "rstrip": false,
576
- "single_word": false,
577
- "special": true
578
- },
579
- "128072": {
580
- "content": "__pa__",
581
- "lstrip": false,
582
- "normalized": false,
583
- "rstrip": false,
584
- "single_word": false,
585
- "special": true
586
- },
587
- "128073": {
588
- "content": "__pl__",
589
- "lstrip": false,
590
- "normalized": false,
591
- "rstrip": false,
592
- "single_word": false,
593
- "special": true
594
- },
595
- "128074": {
596
- "content": "__ps__",
597
- "lstrip": false,
598
- "normalized": false,
599
- "rstrip": false,
600
- "single_word": false,
601
- "special": true
602
- },
603
- "128075": {
604
- "content": "__pt__",
605
- "lstrip": false,
606
- "normalized": false,
607
- "rstrip": false,
608
- "single_word": false,
609
- "special": true
610
- },
611
- "128076": {
612
- "content": "__ro__",
613
- "lstrip": false,
614
- "normalized": false,
615
- "rstrip": false,
616
- "single_word": false,
617
- "special": true
618
- },
619
- "128077": {
620
- "content": "__ru__",
621
- "lstrip": false,
622
- "normalized": false,
623
- "rstrip": false,
624
- "single_word": false,
625
- "special": true
626
- },
627
- "128078": {
628
- "content": "__sd__",
629
- "lstrip": false,
630
- "normalized": false,
631
- "rstrip": false,
632
- "single_word": false,
633
- "special": true
634
- },
635
- "128079": {
636
- "content": "__si__",
637
- "lstrip": false,
638
- "normalized": false,
639
- "rstrip": false,
640
- "single_word": false,
641
- "special": true
642
- },
643
- "128080": {
644
- "content": "__sk__",
645
- "lstrip": false,
646
- "normalized": false,
647
- "rstrip": false,
648
- "single_word": false,
649
- "special": true
650
- },
651
- "128081": {
652
- "content": "__sl__",
653
- "lstrip": false,
654
- "normalized": false,
655
- "rstrip": false,
656
- "single_word": false,
657
- "special": true
658
- },
659
- "128082": {
660
- "content": "__so__",
661
- "lstrip": false,
662
- "normalized": false,
663
- "rstrip": false,
664
- "single_word": false,
665
- "special": true
666
- },
667
- "128083": {
668
- "content": "__sq__",
669
- "lstrip": false,
670
- "normalized": false,
671
- "rstrip": false,
672
- "single_word": false,
673
- "special": true
674
- },
675
- "128084": {
676
- "content": "__sr__",
677
- "lstrip": false,
678
- "normalized": false,
679
- "rstrip": false,
680
- "single_word": false,
681
- "special": true
682
- },
683
- "128085": {
684
- "content": "__ss__",
685
- "lstrip": false,
686
- "normalized": false,
687
- "rstrip": false,
688
- "single_word": false,
689
- "special": true
690
- },
691
- "128086": {
692
- "content": "__su__",
693
- "lstrip": false,
694
- "normalized": false,
695
- "rstrip": false,
696
- "single_word": false,
697
- "special": true
698
- },
699
- "128087": {
700
- "content": "__sv__",
701
- "lstrip": false,
702
- "normalized": false,
703
- "rstrip": false,
704
- "single_word": false,
705
- "special": true
706
- },
707
- "128088": {
708
- "content": "__sw__",
709
- "lstrip": false,
710
- "normalized": false,
711
- "rstrip": false,
712
- "single_word": false,
713
- "special": true
714
- },
715
- "128089": {
716
- "content": "__ta__",
717
- "lstrip": false,
718
- "normalized": false,
719
- "rstrip": false,
720
- "single_word": false,
721
- "special": true
722
- },
723
- "128090": {
724
- "content": "__th__",
725
- "lstrip": false,
726
- "normalized": false,
727
- "rstrip": false,
728
- "single_word": false,
729
- "special": true
730
- },
731
- "128091": {
732
- "content": "__tl__",
733
- "lstrip": false,
734
- "normalized": false,
735
- "rstrip": false,
736
- "single_word": false,
737
- "special": true
738
- },
739
- "128092": {
740
- "content": "__tn__",
741
- "lstrip": false,
742
- "normalized": false,
743
- "rstrip": false,
744
- "single_word": false,
745
- "special": true
746
- },
747
- "128093": {
748
- "content": "__tr__",
749
- "lstrip": false,
750
- "normalized": false,
751
- "rstrip": false,
752
- "single_word": false,
753
- "special": true
754
- },
755
- "128094": {
756
- "content": "__uk__",
757
- "lstrip": false,
758
- "normalized": false,
759
- "rstrip": false,
760
- "single_word": false,
761
- "special": true
762
- },
763
- "128095": {
764
- "content": "__ur__",
765
- "lstrip": false,
766
- "normalized": false,
767
- "rstrip": false,
768
- "single_word": false,
769
- "special": true
770
- },
771
- "128096": {
772
- "content": "__uz__",
773
- "lstrip": false,
774
- "normalized": false,
775
- "rstrip": false,
776
- "single_word": false,
777
- "special": true
778
- },
779
- "128097": {
780
- "content": "__vi__",
781
- "lstrip": false,
782
- "normalized": false,
783
- "rstrip": false,
784
- "single_word": false,
785
- "special": true
786
- },
787
- "128098": {
788
- "content": "__wo__",
789
- "lstrip": false,
790
- "normalized": false,
791
- "rstrip": false,
792
- "single_word": false,
793
- "special": true
794
- },
795
- "128099": {
796
- "content": "__xh__",
797
- "lstrip": false,
798
- "normalized": false,
799
- "rstrip": false,
800
- "single_word": false,
801
- "special": true
802
- },
803
- "128100": {
804
- "content": "__yi__",
805
- "lstrip": false,
806
- "normalized": false,
807
- "rstrip": false,
808
- "single_word": false,
809
- "special": true
810
- },
811
- "128101": {
812
- "content": "__yo__",
813
- "lstrip": false,
814
- "normalized": false,
815
- "rstrip": false,
816
- "single_word": false,
817
- "special": true
818
- },
819
- "128102": {
820
- "content": "__zh__",
821
- "lstrip": false,
822
- "normalized": false,
823
- "rstrip": false,
824
- "single_word": false,
825
- "special": true
826
- },
827
- "128103": {
828
- "content": "__zu__",
829
- "lstrip": false,
830
- "normalized": false,
831
  "rstrip": false,
832
  "single_word": false,
833
  "special": true
834
  }
835
  },
836
  "additional_special_tokens": [
837
- "__af__",
838
- "__am__",
839
- "__ar__",
840
- "__ast__",
841
- "__az__",
842
- "__ba__",
843
- "__be__",
844
- "__bg__",
845
- "__bn__",
846
- "__br__",
847
- "__bs__",
848
- "__ca__",
849
- "__ceb__",
850
- "__cs__",
851
- "__cy__",
852
- "__da__",
853
- "__de__",
854
- "__el__",
855
- "__en__",
856
- "__es__",
857
- "__et__",
858
- "__fa__",
859
- "__ff__",
860
- "__fi__",
861
- "__fr__",
862
- "__fy__",
863
- "__ga__",
864
- "__gd__",
865
- "__gl__",
866
- "__gu__",
867
- "__ha__",
868
- "__he__",
869
- "__hi__",
870
- "__hr__",
871
- "__ht__",
872
- "__hu__",
873
- "__hy__",
874
- "__id__",
875
- "__ig__",
876
- "__ilo__",
877
- "__is__",
878
- "__it__",
879
- "__ja__",
880
- "__jv__",
881
- "__ka__",
882
- "__kk__",
883
- "__km__",
884
- "__kn__",
885
- "__ko__",
886
- "__lb__",
887
- "__lg__",
888
- "__ln__",
889
- "__lo__",
890
- "__lt__",
891
- "__lv__",
892
- "__mg__",
893
- "__mk__",
894
- "__ml__",
895
- "__mn__",
896
- "__mr__",
897
- "__ms__",
898
- "__my__",
899
- "__ne__",
900
- "__nl__",
901
- "__no__",
902
- "__ns__",
903
- "__oc__",
904
- "__or__",
905
- "__pa__",
906
- "__pl__",
907
- "__ps__",
908
- "__pt__",
909
- "__ro__",
910
- "__ru__",
911
- "__sd__",
912
- "__si__",
913
- "__sk__",
914
- "__sl__",
915
- "__so__",
916
- "__sq__",
917
- "__sr__",
918
- "__ss__",
919
- "__su__",
920
- "__sv__",
921
- "__sw__",
922
- "__ta__",
923
- "__th__",
924
- "__tl__",
925
- "__tn__",
926
- "__tr__",
927
- "__uk__",
928
- "__ur__",
929
- "__uz__",
930
- "__vi__",
931
- "__wo__",
932
- "__xh__",
933
- "__yi__",
934
- "__yo__",
935
- "__zh__",
936
- "__zu__"
937
  ],
938
  "bos_token": "<s>",
939
  "clean_up_tokenization_spaces": true,
 
940
  "eos_token": "</s>",
941
- "language_codes": "m2m100",
942
- "model_max_length": 1024,
943
- "num_madeup_words": 8,
944
  "pad_token": "<pad>",
945
  "sep_token": "</s>",
946
  "sp_model_kwargs": {},
947
- "src_lang": "en",
948
  "tgt_lang": null,
949
- "tokenizer_class": "M2M100Tokenizer",
950
  "unk_token": "<unk>"
951
  }
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "250001": {
36
+ "content": "ar_AR",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "250002": {
44
+ "content": "cs_CZ",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "250003": {
52
+ "content": "de_DE",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "250004": {
60
+ "content": "en_XX",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "250005": {
68
+ "content": "es_XX",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "250006": {
76
+ "content": "et_EE",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "250007": {
84
+ "content": "fi_FI",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "250008": {
92
+ "content": "fr_XX",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "250009": {
100
+ "content": "gu_IN",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "250010": {
108
+ "content": "hi_IN",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "250011": {
116
+ "content": "it_IT",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false,
121
  "special": true
122
  },
123
+ "250012": {
124
+ "content": "ja_XX",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
128
  "single_word": false,
129
  "special": true
130
  },
131
+ "250013": {
132
+ "content": "kk_KZ",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
136
  "single_word": false,
137
  "special": true
138
  },
139
+ "250014": {
140
+ "content": "ko_KR",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
144
  "single_word": false,
145
  "special": true
146
  },
147
+ "250015": {
148
+ "content": "lt_LT",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
152
  "single_word": false,
153
  "special": true
154
  },
155
+ "250016": {
156
+ "content": "lv_LV",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
160
  "single_word": false,
161
  "special": true
162
  },
163
+ "250017": {
164
+ "content": "my_MM",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
168
  "single_word": false,
169
  "special": true
170
  },
171
+ "250018": {
172
+ "content": "ne_NP",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
176
  "single_word": false,
177
  "special": true
178
  },
179
+ "250019": {
180
+ "content": "nl_XX",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
184
  "single_word": false,
185
  "special": true
186
  },
187
+ "250020": {
188
+ "content": "ro_RO",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
192
  "single_word": false,
193
  "special": true
194
  },
195
+ "250021": {
196
+ "content": "ru_RU",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
200
  "single_word": false,
201
  "special": true
202
  },
203
+ "250022": {
204
+ "content": "si_LK",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
208
  "single_word": false,
209
  "special": true
210
  },
211
+ "250023": {
212
+ "content": "tr_TR",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
216
  "single_word": false,
217
  "special": true
218
  },
219
+ "250024": {
220
+ "content": "vi_VN",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
224
  "single_word": false,
225
  "special": true
226
  },
227
+ "250025": {
228
+ "content": "zh_CN",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
232
  "single_word": false,
233
  "special": true
234
  },
235
+ "250026": {
236
+ "content": "af_ZA",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
240
  "single_word": false,
241
  "special": true
242
  },
243
+ "250027": {
244
+ "content": "az_AZ",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
248
  "single_word": false,
249
  "special": true
250
  },
251
+ "250028": {
252
+ "content": "bn_IN",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
256
  "single_word": false,
257
  "special": true
258
  },
259
+ "250029": {
260
+ "content": "fa_IR",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
264
  "single_word": false,
265
  "special": true
266
  },
267
+ "250030": {
268
+ "content": "he_IL",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
272
  "single_word": false,
273
  "special": true
274
  },
275
+ "250031": {
276
+ "content": "hr_HR",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
280
  "single_word": false,
281
  "special": true
282
  },
283
+ "250032": {
284
+ "content": "id_ID",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
288
  "single_word": false,
289
  "special": true
290
  },
291
+ "250033": {
292
+ "content": "ka_GE",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
296
  "single_word": false,
297
  "special": true
298
  },
299
+ "250034": {
300
+ "content": "km_KH",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
304
  "single_word": false,
305
  "special": true
306
  },
307
+ "250035": {
308
+ "content": "mk_MK",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
312
  "single_word": false,
313
  "special": true
314
  },
315
+ "250036": {
316
+ "content": "ml_IN",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
320
  "single_word": false,
321
  "special": true
322
  },
323
+ "250037": {
324
+ "content": "mn_MN",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
328
  "single_word": false,
329
  "special": true
330
  },
331
+ "250038": {
332
+ "content": "mr_IN",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
336
  "single_word": false,
337
  "special": true
338
  },
339
+ "250039": {
340
+ "content": "pl_PL",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
344
  "single_word": false,
345
  "special": true
346
  },
347
+ "250040": {
348
+ "content": "ps_AF",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
352
  "single_word": false,
353
  "special": true
354
  },
355
+ "250041": {
356
+ "content": "pt_XX",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
360
  "single_word": false,
361
  "special": true
362
  },
363
+ "250042": {
364
+ "content": "sv_SE",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
368
  "single_word": false,
369
  "special": true
370
  },
371
+ "250043": {
372
+ "content": "sw_KE",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
376
  "single_word": false,
377
  "special": true
378
  },
379
+ "250044": {
380
+ "content": "ta_IN",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
384
  "single_word": false,
385
  "special": true
386
  },
387
+ "250045": {
388
+ "content": "te_IN",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
392
  "single_word": false,
393
  "special": true
394
  },
395
+ "250046": {
396
+ "content": "th_TH",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
400
  "single_word": false,
401
  "special": true
402
  },
403
+ "250047": {
404
+ "content": "tl_XX",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
408
  "single_word": false,
409
  "special": true
410
  },
411
+ "250048": {
412
+ "content": "uk_UA",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
416
  "single_word": false,
417
  "special": true
418
  },
419
+ "250049": {
420
+ "content": "ur_PK",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
424
  "single_word": false,
425
  "special": true
426
  },
427
+ "250050": {
428
+ "content": "xh_ZA",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
432
  "single_word": false,
433
  "special": true
434
  },
435
+ "250051": {
436
+ "content": "gl_ES",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
440
  "single_word": false,
441
  "special": true
442
  },
443
+ "250052": {
444
+ "content": "sl_SI",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
448
  "single_word": false,
449
  "special": true
450
  },
451
+ "250053": {
452
+ "content": "<mask>",
453
+ "lstrip": true,
454
+ "normalized": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
458
  }
459
  },
460
  "additional_special_tokens": [
461
+ "ar_AR",
462
+ "cs_CZ",
463
+ "de_DE",
464
+ "en_XX",
465
+ "es_XX",
466
+ "et_EE",
467
+ "fi_FI",
468
+ "fr_XX",
469
+ "gu_IN",
470
+ "hi_IN",
471
+ "it_IT",
472
+ "ja_XX",
473
+ "kk_KZ",
474
+ "ko_KR",
475
+ "lt_LT",
476
+ "lv_LV",
477
+ "my_MM",
478
+ "ne_NP",
479
+ "nl_XX",
480
+ "ro_RO",
481
+ "ru_RU",
482
+ "si_LK",
483
+ "tr_TR",
484
+ "vi_VN",
485
+ "zh_CN",
486
+ "af_ZA",
487
+ "az_AZ",
488
+ "bn_IN",
489
+ "fa_IR",
490
+ "he_IL",
491
+ "hr_HR",
492
+ "id_ID",
493
+ "ka_GE",
494
+ "km_KH",
495
+ "mk_MK",
496
+ "ml_IN",
497
+ "mn_MN",
498
+ "mr_IN",
499
+ "pl_PL",
500
+ "ps_AF",
501
+ "pt_XX",
502
+ "sv_SE",
503
+ "sw_KE",
504
+ "ta_IN",
505
+ "te_IN",
506
+ "th_TH",
507
+ "tl_XX",
508
+ "uk_UA",
509
+ "ur_PK",
510
+ "xh_ZA",
511
+ "gl_ES",
512
+ "sl_SI"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  ],
514
  "bos_token": "<s>",
515
  "clean_up_tokenization_spaces": true,
516
+ "cls_token": "<s>",
517
  "eos_token": "</s>",
518
+ "language_codes": "ML50",
519
+ "mask_token": "<mask>",
520
+ "model_max_length": 1000000000000000019884624838656,
521
  "pad_token": "<pad>",
522
  "sep_token": "</s>",
523
  "sp_model_kwargs": {},
524
+ "src_lang": "en_XX",
525
  "tgt_lang": null,
526
+ "tokenizer_class": "MBart50Tokenizer",
527
  "unk_token": "<unk>"
528
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d98351f61bf410cac16026753fa9647aa9fd26ca17ab7a150a0294a2e7053394
3
  size 4667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d3d2bf4279686ec39bc30d2e7b449cb8a7feeec0786fdc3c48f3df11bfbecf
3
  size 4667