hyojin99 commited on
Commit
7a9f78f
Β·
verified Β·
1 Parent(s): dc0669a

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +2 -0
  2. vocab.json +71 -71
tokenizer_config.json CHANGED
@@ -2,7 +2,9 @@
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
 
5
  "pad_token": "[PAD]",
 
6
  "replace_word_delimiter_char": " ",
7
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
  "unk_token": "[UNK]",
 
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
+ "max_length": 512,
6
  "pad_token": "[PAD]",
7
+ "padding": "max_length",
8
  "replace_word_delimiter_char": " ",
9
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
10
  "unk_token": "[UNK]",
vocab.json CHANGED
@@ -1,75 +1,75 @@
1
  {
2
- "0": 17,
3
- "1": 62,
4
- "2": 50,
5
- "3": 6,
6
- "4": 9,
7
- "8": 66,
8
  "[PAD]": 72,
9
  "[UNK]": 71,
10
- "a": 40,
11
- "c": 41,
12
- "d": 44,
13
- "e": 60,
14
- "f": 58,
15
- "i": 5,
16
- "j": 10,
17
- "m": 30,
18
- "n": 51,
19
- "o": 35,
20
- "p": 14,
21
- "r": 27,
22
- "s": 53,
23
- "t": 13,
24
- "u": 45,
25
- "v": 26,
26
- "|": 19,
27
- "γ„±": 56,
28
- "γ„²": 28,
29
- "γ„΄": 38,
30
- "γ„΅": 18,
31
- "γ„Ά": 63,
32
- "γ„·": 1,
33
- "γ„Έ": 15,
34
- "γ„Ή": 68,
35
- "γ„Ί": 11,
36
- "γ„»": 22,
37
- "γ„Ό": 12,
38
- "γ„Ύ": 59,
39
- "γ…€": 7,
40
- "ㅁ": 2,
41
- "γ…‚": 64,
42
- "γ…ƒ": 46,
43
- "γ…„": 8,
44
- "γ……": 20,
45
- "γ…†": 23,
46
- "γ…‡": 16,
47
- "γ…ˆ": 34,
48
- "γ…‰": 24,
49
- "γ…Š": 29,
50
- "γ…‹": 31,
51
- "γ…Œ": 54,
52
- "ㅍ": 0,
53
- "γ…Ž": 57,
54
- "ㅏ": 55,
55
- "ㅐ": 69,
56
- "γ…‘": 42,
57
- "γ…’": 61,
58
- "γ…“": 49,
59
- "γ…”": 65,
60
- "γ…•": 21,
61
- "γ…–": 3,
62
- "γ…—": 48,
63
- "γ…˜": 47,
64
- "γ…™": 37,
65
- "γ…š": 32,
66
- "γ…›": 70,
67
- "γ…œ": 52,
68
- "ㅝ": 67,
69
- "γ…ž": 39,
70
- "γ…Ÿ": 33,
71
- "γ… ": 25,
72
- "γ…‘": 43,
73
- "γ…’": 4,
74
- "γ…£": 36
75
  }
 
1
  {
2
+ "0": 32,
3
+ "1": 14,
4
+ "2": 26,
5
+ "3": 18,
6
+ "4": 31,
7
+ "8": 27,
8
  "[PAD]": 72,
9
  "[UNK]": 71,
10
+ "a": 25,
11
+ "c": 67,
12
+ "d": 6,
13
+ "e": 46,
14
+ "f": 36,
15
+ "i": 56,
16
+ "j": 41,
17
+ "m": 38,
18
+ "n": 3,
19
+ "o": 66,
20
+ "p": 1,
21
+ "r": 7,
22
+ "s": 70,
23
+ "t": 4,
24
+ "u": 13,
25
+ "v": 45,
26
+ "|": 34,
27
+ "γ„±": 37,
28
+ "γ„²": 12,
29
+ "γ„΄": 61,
30
+ "γ„΅": 53,
31
+ "γ„Ά": 19,
32
+ "γ„·": 42,
33
+ "γ„Έ": 59,
34
+ "γ„Ή": 40,
35
+ "γ„Ί": 55,
36
+ "γ„»": 24,
37
+ "γ„Ό": 17,
38
+ "γ„Ύ": 21,
39
+ "γ…€": 68,
40
+ "ㅁ": 23,
41
+ "γ…‚": 9,
42
+ "γ…ƒ": 39,
43
+ "γ…„": 44,
44
+ "γ……": 30,
45
+ "γ…†": 69,
46
+ "γ…‡": 65,
47
+ "γ…ˆ": 10,
48
+ "γ…‰": 11,
49
+ "γ…Š": 58,
50
+ "γ…‹": 8,
51
+ "γ…Œ": 35,
52
+ "ㅍ": 48,
53
+ "γ…Ž": 15,
54
+ "ㅏ": 52,
55
+ "ㅐ": 50,
56
+ "γ…‘": 49,
57
+ "γ…’": 20,
58
+ "γ…“": 57,
59
+ "γ…”": 33,
60
+ "γ…•": 28,
61
+ "γ…–": 29,
62
+ "γ…—": 22,
63
+ "γ…˜": 5,
64
+ "γ…™": 64,
65
+ "γ…š": 62,
66
+ "γ…›": 2,
67
+ "γ…œ": 47,
68
+ "ㅝ": 51,
69
+ "γ…ž": 43,
70
+ "γ…Ÿ": 16,
71
+ "γ… ": 0,
72
+ "γ…‘": 54,
73
+ "γ…’": 60,
74
+ "γ…£": 63
75
  }