hyojin99 commited on
Commit
bc56161
Β·
verified Β·
1 Parent(s): 8e7edb1

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +10 -0
  3. vocab.json +75 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "pad_token": "[PAD]",
6
+ "replace_word_delimiter_char": " ",
7
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
+ "unk_token": "[UNK]",
9
+ "word_delimiter_token": "|"
10
+ }
vocab.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 31,
3
+ "1": 20,
4
+ "2": 3,
5
+ "3": 48,
6
+ "4": 42,
7
+ "8": 69,
8
+ "[PAD]": 72,
9
+ "[UNK]": 71,
10
+ "a": 23,
11
+ "c": 54,
12
+ "d": 28,
13
+ "e": 0,
14
+ "f": 25,
15
+ "i": 57,
16
+ "j": 8,
17
+ "m": 15,
18
+ "n": 33,
19
+ "o": 29,
20
+ "p": 26,
21
+ "r": 51,
22
+ "s": 62,
23
+ "t": 63,
24
+ "u": 64,
25
+ "v": 68,
26
+ "|": 59,
27
+ "γ„±": 65,
28
+ "γ„²": 67,
29
+ "γ„΄": 60,
30
+ "γ„΅": 7,
31
+ "γ„Ά": 2,
32
+ "γ„·": 6,
33
+ "γ„Έ": 12,
34
+ "γ„Ή": 27,
35
+ "γ„Ί": 34,
36
+ "γ„»": 22,
37
+ "γ„Ό": 70,
38
+ "γ„Ύ": 13,
39
+ "γ…€": 55,
40
+ "ㅁ": 45,
41
+ "γ…‚": 5,
42
+ "γ…ƒ": 41,
43
+ "γ…„": 61,
44
+ "γ……": 18,
45
+ "γ…†": 46,
46
+ "γ…‡": 17,
47
+ "γ…ˆ": 16,
48
+ "γ…‰": 40,
49
+ "γ…Š": 9,
50
+ "γ…‹": 19,
51
+ "γ…Œ": 47,
52
+ "ㅍ": 1,
53
+ "γ…Ž": 14,
54
+ "ㅏ": 66,
55
+ "ㅐ": 39,
56
+ "γ…‘": 37,
57
+ "γ…’": 11,
58
+ "γ…“": 38,
59
+ "γ…”": 52,
60
+ "γ…•": 43,
61
+ "γ…–": 24,
62
+ "γ…—": 58,
63
+ "γ…˜": 49,
64
+ "γ…™": 35,
65
+ "γ…š": 21,
66
+ "γ…›": 53,
67
+ "γ…œ": 50,
68
+ "ㅝ": 30,
69
+ "γ…ž": 10,
70
+ "γ…Ÿ": 44,
71
+ "γ… ": 4,
72
+ "γ…‘": 56,
73
+ "γ…’": 36,
74
+ "γ…£": 32
75
+ }