dongim04 commited on
Commit
2b32ebf
·
verified ·
1 Parent(s): 7de9d70

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 575,
3
+ "<s>": 574,
4
+ "[PAD]": 573,
5
+ "[UNK]": 572
6
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "572": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "573": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "574": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "575": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "additional_special_tokens": [],
37
+ "bos_token": "<s>",
38
+ "clean_up_tokenization_spaces": true,
39
+ "do_lower_case": false,
40
+ "eos_token": "</s>",
41
+ "model_max_length": 1000000000000000019884624838656,
42
+ "pad_token": "[PAD]",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "tokenizer_file": null,
47
+ "unk_token": "[UNK]",
48
+ "word_delimiter_token": "|"
49
+ }
vocab.json ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 573,
3
+ "[UNK]": 572,
4
+ "|": 571,
5
+ "一": 8,
6
+ "七": 525,
7
+ "三": 452,
8
+ "上": 328,
9
+ "下": 99,
10
+ "不": 403,
11
+ "与": 273,
12
+ "专": 566,
13
+ "且": 496,
14
+ "业": 405,
15
+ "东": 470,
16
+ "两": 213,
17
+ "严": 129,
18
+ "个": 153,
19
+ "中": 418,
20
+ "为": 198,
21
+ "主": 527,
22
+ "久": 265,
23
+ "么": 334,
24
+ "之": 4,
25
+ "乌": 385,
26
+ "也": 3,
27
+ "习": 381,
28
+ "买": 408,
29
+ "了": 538,
30
+ "事": 422,
31
+ "二": 46,
32
+ "于": 82,
33
+ "互": 393,
34
+ "五": 175,
35
+ "些": 216,
36
+ "交": 86,
37
+ "享": 197,
38
+ "亲": 228,
39
+ "人": 75,
40
+ "什": 252,
41
+ "今": 300,
42
+ "介": 523,
43
+ "从": 203,
44
+ "他": 516,
45
+ "以": 493,
46
+ "们": 358,
47
+ "件": 439,
48
+ "任": 285,
49
+ "份": 299,
50
+ "仿": 532,
51
+ "优": 84,
52
+ "会": 563,
53
+ "伴": 32,
54
+ "但": 136,
55
+ "位": 67,
56
+ "住": 1,
57
+ "体": 211,
58
+ "余": 491,
59
+ "佛": 414,
60
+ "作": 297,
61
+ "你": 364,
62
+ "使": 536,
63
+ "侵": 40,
64
+ "便": 332,
65
+ "信": 54,
66
+ "修": 185,
67
+ "候": 80,
68
+ "倾": 365,
69
+ "偏": 95,
70
+ "做": 126,
71
+ "停": 19,
72
+ "偶": 188,
73
+ "傅": 542,
74
+ "催": 495,
75
+ "像": 465,
76
+ "儿": 276,
77
+ "充": 284,
78
+ "先": 488,
79
+ "光": 21,
80
+ "克": 456,
81
+ "免": 62,
82
+ "全": 14,
83
+ "八": 294,
84
+ "六": 503,
85
+ "兰": 363,
86
+ "关": 111,
87
+ "兴": 460,
88
+ "其": 537,
89
+ "内": 155,
90
+ "再": 505,
91
+ "写": 565,
92
+ "冲": 370,
93
+ "决": 215,
94
+ "况": 359,
95
+ "准": 248,
96
+ "减": 250,
97
+ "几": 100,
98
+ "出": 6,
99
+ "分": 26,
100
+ "刚": 165,
101
+ "初": 303,
102
+ "别": 30,
103
+ "到": 73,
104
+ "刻": 411,
105
+ "前": 355,
106
+ "剑": 45,
107
+ "剧": 59,
108
+ "力": 56,
109
+ "功": 57,
110
+ "务": 489,
111
+ "动": 498,
112
+ "助": 553,
113
+ "努": 401,
114
+ "劳": 212,
115
+ "勤": 451,
116
+ "北": 454,
117
+ "区": 77,
118
+ "十": 446,
119
+ "半": 569,
120
+ "华": 88,
121
+ "单": 543,
122
+ "卡": 526,
123
+ "印": 37,
124
+ "历": 44,
125
+ "原": 50,
126
+ "去": 293,
127
+ "又": 275,
128
+ "友": 340,
129
+ "反": 169,
130
+ "发": 76,
131
+ "取": 74,
132
+ "受": 417,
133
+ "变": 444,
134
+ "口": 125,
135
+ "另": 245,
136
+ "只": 177,
137
+ "叫": 121,
138
+ "可": 249,
139
+ "右": 258,
140
+ "号": 66,
141
+ "吃": 267,
142
+ "各": 519,
143
+ "合": 161,
144
+ "同": 235,
145
+ "名": 98,
146
+ "后": 362,
147
+ "向": 253,
148
+ "吗": 567,
149
+ "吧": 520,
150
+ "听": 227,
151
+ "吼": 424,
152
+ "呀": 134,
153
+ "呃": 389,
154
+ "呗": 420,
155
+ "呢": 329,
156
+ "周": 421,
157
+ "和": 376,
158
+ "咱": 130,
159
+ "哈": 94,
160
+ "响": 530,
161
+ "哎": 413,
162
+ "哦": 291,
163
+ "哪": 459,
164
+ "啊": 323,
165
+ "啥": 71,
166
+ "喜": 429,
167
+ "嗯": 333,
168
+ "嘛": 361,
169
+ "嘴": 176,
170
+ "器": 236,
171
+ "四": 115,
172
+ "回": 52,
173
+ "因": 395,
174
+ "园": 38,
175
+ "围": 191,
176
+ "国": 109,
177
+ "在": 123,
178
+ "地": 180,
179
+ "城": 367,
180
+ "基": 425,
181
+ "壳": 70,
182
+ "备": 147,
183
+ "复": 189,
184
+ "外": 146,
185
+ "多": 221,
186
+ "够": 31,
187
+ "大": 304,
188
+ "天": 482,
189
+ "太": 322,
190
+ "头": 302,
191
+ "奇": 549,
192
+ "奋": 200,
193
+ "女": 379,
194
+ "她": 112,
195
+ "好": 13,
196
+ "如": 63,
197
+ "妈": 58,
198
+ "始": 41,
199
+ "姓": 568,
200
+ "威": 410,
201
+ "子": 350,
202
+ "字": 357,
203
+ "存": 287,
204
+ "学": 448,
205
+ "孩": 164,
206
+ "它": 48,
207
+ "安": 83,
208
+ "完": 368,
209
+ "定": 348,
210
+ "宝": 201,
211
+ "实": 69,
212
+ "家": 330,
213
+ "容": 308,
214
+ "宽": 139,
215
+ "寡": 316,
216
+ "对": 383,
217
+ "射": 529,
218
+ "小": 453,
219
+ "少": 546,
220
+ "尔": 301,
221
+ "就": 399,
222
+ "尽": 51,
223
+ "局": 23,
224
+ "居": 5,
225
+ "屏": 135,
226
+ "展": 375,
227
+ "属": 18,
228
+ "山": 548,
229
+ "岁": 396,
230
+ "州": 254,
231
+ "工": 515,
232
+ "左": 507,
233
+ "差": 390,
234
+ "己": 458,
235
+ "已": 437,
236
+ "巴": 335,
237
+ "市": 309,
238
+ "师": 163,
239
+ "希": 205,
240
+ "带": 33,
241
+ "帮": 502,
242
+ "常": 277,
243
+ "幕": 555,
244
+ "干": 551,
245
+ "平": 509,
246
+ "年": 208,
247
+ "并": 382,
248
+ "广": 263,
249
+ "应": 138,
250
+ "底": 234,
251
+ "度": 72,
252
+ "建": 110,
253
+ "开": 320,
254
+ "弄": 28,
255
+ "强": 347,
256
+ "归": 120,
257
+ "当": 292,
258
+ "彩": 143,
259
+ "影": 25,
260
+ "往": 478,
261
+ "很": 518,
262
+ "得": 116,
263
+ "微": 29,
264
+ "心": 461,
265
+ "忘": 419,
266
+ "忧": 39,
267
+ "快": 522,
268
+ "念": 471,
269
+ "怀": 101,
270
+ "怎": 96,
271
+ "怕": 402,
272
+ "思": 223,
273
+ "总": 504,
274
+ "悉": 279,
275
+ "悠": 20,
276
+ "患": 102,
277
+ "悲": 182,
278
+ "情": 386,
279
+ "惨": 469,
280
+ "惯": 93,
281
+ "想": 468,
282
+ "意": 281,
283
+ "感": 264,
284
+ "慢": 27,
285
+ "懂": 183,
286
+ "懒": 511,
287
+ "成": 194,
288
+ "我": 9,
289
+ "或": 173,
290
+ "戚": 256,
291
+ "房": 218,
292
+ "所": 157,
293
+ "手": 346,
294
+ "才": 450,
295
+ "打": 339,
296
+ "执": 233,
297
+ "找": 345,
298
+ "把": 78,
299
+ "抓": 487,
300
+ "抢": 206,
301
+ "抱": 192,
302
+ "担": 371,
303
+ "拍": 36,
304
+ "拿": 158,
305
+ "挂": 231,
306
+ "按": 464,
307
+ "挺": 47,
308
+ "换": 247,
309
+ "探": 217,
310
+ "接": 490,
311
+ "提": 262,
312
+ "搜": 240,
313
+ "搞": 195,
314
+ "摆": 442,
315
+ "放": 409,
316
+ "故": 167,
317
+ "教": 534,
318
+ "敬": 159,
319
+ "数": 261,
320
+ "整": 260,
321
+ "文": 556,
322
+ "斗": 486,
323
+ "断": 473,
324
+ "新": 171,
325
+ "方": 295,
326
+ "无": 314,
327
+ "旧": 204,
328
+ "时": 558,
329
+ "明": 106,
330
+ "星": 392,
331
+ "映": 378,
332
+ "是": 319,
333
+ "显": 426,
334
+ "暂": 298,
335
+ "最": 168,
336
+ "月": 118,
337
+ "有": 22,
338
+ "朋": 476,
339
+ "服": 440,
340
+ "望": 352,
341
+ "期": 559,
342
+ "本": 406,
343
+ "机": 90,
344
+ "杀": 312,
345
+ "来": 81,
346
+ "林": 391,
347
+ "果": 64,
348
+ "柔": 15,
349
+ "标": 141,
350
+ "校": 65,
351
+ "样": 539,
352
+ "棋": 564,
353
+ "楼": 91,
354
+ "概": 268,
355
+ "模": 477,
356
+ "次": 199,
357
+ "欢": 443,
358
+ "正": 353,
359
+ "武": 541,
360
+ "死": 377,
361
+ "殖": 506,
362
+ "比": 500,
363
+ "毕": 89,
364
+ "毛": 61,
365
+ "民": 156,
366
+ "水": 547,
367
+ "汇": 466,
368
+ "江": 455,
369
+ "沉": 128,
370
+ "没": 462,
371
+ "沿": 497,
372
+ "法": 117,
373
+ "注": 438,
374
+ "津": 255,
375
+ "活": 246,
376
+ "派": 313,
377
+ "流": 210,
378
+ "浙": 514,
379
+ "海": 430,
380
+ "消": 480,
381
+ "深": 407,
382
+ "温": 142,
383
+ "激": 224,
384
+ "灰": 545,
385
+ "点": 105,
386
+ "烦": 360,
387
+ "焦": 533,
388
+ "然": 131,
389
+ "熔": 225,
390
+ "熟": 343,
391
+ "爬": 103,
392
+ "爱": 531,
393
+ "爸": 43,
394
+ "物": 467,
395
+ "特": 35,
396
+ "狼": 324,
397
+ "环": 193,
398
+ "现": 479,
399
+ "理": 60,
400
+ "瓣": 317,
401
+ "甘": 540,
402
+ "生": 463,
403
+ "用": 272,
404
+ "电": 342,
405
+ "男": 114,
406
+ "画": 149,
407
+ "留": 184,
408
+ "略": 10,
409
+ "病": 412,
410
+ "痕": 172,
411
+ "白": 282,
412
+ "的": 16,
413
+ "盖": 436,
414
+ "目": 239,
415
+ "直": 561,
416
+ "相": 87,
417
+ "看": 2,
418
+ "真": 290,
419
+ "着": 179,
420
+ "研": 97,
421
+ "确": 55,
422
+ "神": 305,
423
+ "离": 474,
424
+ "种": 251,
425
+ "科": 447,
426
+ "秒": 92,
427
+ "称": 481,
428
+ "程": 315,
429
+ "稍": 220,
430
+ "突": 79,
431
+ "竟": 278,
432
+ "童": 554,
433
+ "等": 331,
434
+ "筑": 354,
435
+ "答": 113,
436
+ "策": 0,
437
+ "简": 499,
438
+ "类": 552,
439
+ "精": 34,
440
+ "累": 7,
441
+ "繁": 144,
442
+ "级": 219,
443
+ "线": 307,
444
+ "练": 286,
445
+ "组": 337,
446
+ "细": 380,
447
+ "终": 494,
448
+ "绍": 108,
449
+ "经": 544,
450
+ "结": 356,
451
+ "绕": 232,
452
+ "给": 445,
453
+ "续": 321,
454
+ "综": 351,
455
+ "老": 243,
456
+ "考": 311,
457
+ "者": 501,
458
+ "而": 349,
459
+ "聊": 160,
460
+ "肃": 560,
461
+ "肯": 207,
462
+ "背": 17,
463
+ "能": 557,
464
+ "脑": 274,
465
+ "自": 432,
466
+ "致": 394,
467
+ "舍": 140,
468
+ "般": 209,
469
+ "节": 570,
470
+ "苦": 288,
471
+ "虑": 49,
472
+ "虽": 202,
473
+ "行": 270,
474
+ "被": 271,
475
+ "装": 427,
476
+ "西": 151,
477
+ "要": 124,
478
+ "覆": 289,
479
+ "见": 327,
480
+ "视": 152,
481
+ "觉": 187,
482
+ "角": 230,
483
+ "解": 269,
484
+ "言": 510,
485
+ "认": 513,
486
+ "让": 485,
487
+ "讯": 344,
488
+ "记": 12,
489
+ "讲": 222,
490
+ "许": 127,
491
+ "论": 42,
492
+ "设": 433,
493
+ "评": 387,
494
+ "识": 384,
495
+ "词": 181,
496
+ "试": 415,
497
+ "话": 174,
498
+ "该": 404,
499
+ "详": 435,
500
+ "语": 310,
501
+ "说": 85,
502
+ "读": 24,
503
+ "课": 517,
504
+ "调": 397,
505
+ "谢": 11,
506
+ "谱": 562,
507
+ "豆": 366,
508
+ "象": 326,
509
+ "质": 508,
510
+ "资": 107,
511
+ "走": 104,
512
+ "起": 374,
513
+ "超": 306,
514
+ "趣": 166,
515
+ "足": 280,
516
+ "跟": 512,
517
+ "路": 398,
518
+ "身": 521,
519
+ "转": 133,
520
+ "软": 122,
521
+ "轻": 53,
522
+ "较": 457,
523
+ "输": 431,
524
+ "辖": 68,
525
+ "辛": 434,
526
+ "辩": 137,
527
+ "边": 428,
528
+ "过": 226,
529
+ "近": 259,
530
+ "还": 132,
531
+ "这": 483,
532
+ "远": 244,
533
+ "迹": 484,
534
+ "送": 535,
535
+ "逃": 178,
536
+ "速": 524,
537
+ "遇": 237,
538
+ "避": 229,
539
+ "那": 145,
540
+ "邵": 196,
541
+ "郝": 528,
542
+ "部": 372,
543
+ "都": 416,
544
+ "释": 472,
545
+ "里": 241,
546
+ "重": 341,
547
+ "量": 242,
548
+ "钟": 373,
549
+ "铁": 119,
550
+ "长": 475,
551
+ "门": 283,
552
+ "问": 257,
553
+ "闲": 162,
554
+ "间": 423,
555
+ "院": 150,
556
+ "随": 186,
557
+ "隐": 325,
558
+ "隔": 214,
559
+ "难": 296,
560
+ "需": 441,
561
+ "霸": 318,
562
+ "靠": 148,
563
+ "面": 170,
564
+ "音": 388,
565
+ "顾": 266,
566
+ "题": 369,
567
+ "风": 400,
568
+ "飞": 449,
569
+ "驰": 190,
570
+ "高": 338,
571
+ "魔": 336,
572
+ "麻": 238,
573
+ "默": 550,
574
+ "龄": 154,
575
+ "龙": 492
576
+ }