ImNobody commited on
Commit
dcf7c72
·
1 Parent(s): db47cf8

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +0 -1
  2. vocab.json +40 -40
tokenizer_config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "bos_token": "<s>",
3
- "clean_up_tokenization_spaces": true,
4
  "do_lower_case": false,
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
 
1
  {
2
  "bos_token": "<s>",
 
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
  "model_max_length": 1000000000000000019884624838656,
vocab.json CHANGED
@@ -1,44 +1,44 @@
1
  {
2
- "'": 2,
3
- "(": 15,
4
- ")": 27,
5
- "/": 14,
6
  "[PAD]": 41,
7
  "[UNK]": 40,
8
- "a": 10,
9
- "b": 22,
10
- "c": 4,
11
- "d": 16,
12
- "e": 5,
13
- "f": 11,
14
- "g": 7,
15
- "h": 28,
16
- "i": 18,
17
- "j": 20,
18
- "k": 24,
19
- "l": 12,
20
- "m": 6,
21
- "n": 31,
22
- "o": 38,
23
- "p": 32,
24
- "q": 25,
25
- "r": 9,
26
- "s": 0,
27
- "t": 3,
28
- "u": 39,
29
- "v": 1,
30
- "w": 13,
31
- "x": 29,
32
- "y": 34,
33
- "z": 26,
34
- "|": 36,
35
- "ß": 19,
36
- "à": 37,
37
- "ä": 23,
38
- "é": 21,
39
- "ö": 33,
40
- "ü": 8,
41
- "–": 17,
42
- "’": 35,
43
- "„": 30
44
  }
 
1
  {
2
+ "'": 13,
3
+ "(": 14,
4
+ ")": 0,
5
+ "/": 10,
6
  "[PAD]": 41,
7
  "[UNK]": 40,
8
+ "a": 15,
9
+ "b": 4,
10
+ "c": 25,
11
+ "d": 19,
12
+ "e": 24,
13
+ "f": 30,
14
+ "g": 38,
15
+ "h": 36,
16
+ "i": 37,
17
+ "j": 3,
18
+ "k": 18,
19
+ "l": 34,
20
+ "m": 23,
21
+ "n": 21,
22
+ "o": 9,
23
+ "p": 31,
24
+ "q": 29,
25
+ "r": 20,
26
+ "s": 12,
27
+ "t": 11,
28
+ "u": 35,
29
+ "v": 26,
30
+ "w": 7,
31
+ "x": 27,
32
+ "y": 1,
33
+ "z": 33,
34
+ "|": 17,
35
+ "ß": 22,
36
+ "à": 39,
37
+ "ä": 8,
38
+ "é": 6,
39
+ "ö": 28,
40
+ "ü": 32,
41
+ "–": 16,
42
+ "’": 5,
43
+ "„": 2
44
  }