lrana commited on
Commit
719e1d3
·
verified ·
1 Parent(s): 591fcaa

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,3 @@
1
  {
2
- "<pad>": 32000,
3
- "[PAD]": 32001
4
  }
 
1
  {
2
+ "<pad>": 32000
 
3
  }
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "</s>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -43,15 +43,6 @@
43
  "rstrip": false,
44
  "normalized": true,
45
  "special": false
46
- },
47
- {
48
- "id": 32001,
49
- "content": "[PAD]",
50
- "single_word": false,
51
- "lstrip": false,
52
- "rstrip": false,
53
- "normalized": false,
54
- "special": true
55
  }
56
  ],
57
  "normalizer": {
 
43
  "rstrip": false,
44
  "normalized": true,
45
  "special": false
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "normalizer": {
tokenizer_config.json CHANGED
@@ -33,14 +33,6 @@
33
  "rstrip": false,
34
  "single_word": false,
35
  "special": false
36
- },
37
- "32001": {
38
- "content": "[PAD]",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
  }
45
  },
46
  "bos_token": "<s>",
@@ -48,7 +40,7 @@
48
  "eos_token": "</s>",
49
  "legacy": false,
50
  "model_max_length": 1000000000000000019884624838656,
51
- "pad_token": "</s>",
52
  "sp_model_kwargs": {},
53
  "tokenizer_class": "LlamaTokenizer",
54
  "unk_token": "<unk>",
 
33
  "rstrip": false,
34
  "single_word": false,
35
  "special": false
 
 
 
 
 
 
 
 
36
  }
37
  },
38
  "bos_token": "<s>",
 
40
  "eos_token": "</s>",
41
  "legacy": false,
42
  "model_max_length": 1000000000000000019884624838656,
43
+ "pad_token": "<unk>",
44
  "sp_model_kwargs": {},
45
  "tokenizer_class": "LlamaTokenizer",
46
  "unk_token": "<unk>",