obalcells commited on
Commit
68cc385
·
verified ·
1 Parent(s): 61ccc4f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +7 -0
  2. tokenizer_config.json +5 -1
special_tokens_map.json CHANGED
@@ -12,5 +12,12 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
 
 
 
 
 
 
 
15
  }
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
  }
23
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "128000": {
4
  "content": "<|begin_of_text|>",
@@ -2059,5 +2060,8 @@
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 131072,
2062
- "tokenizer_class": "PreTrainedTokenizer"
 
 
 
2063
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "added_tokens_decoder": {
4
  "128000": {
5
  "content": "<|begin_of_text|>",
 
2060
  "attention_mask"
2061
  ],
2062
  "model_max_length": 131072,
2063
+ "pad_token": "<|finetune_right_pad_id|>",
2064
+ "padding_side": "left",
2065
+ "tokenizer_class": "PreTrainedTokenizer",
2066
+ "unk_token": null
2067
  }