wangrongsheng commited on
Commit
a83c3fd
·
verified ·
1 Parent(s): 796ed17

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +35 -5
tokenizer_config.json CHANGED
@@ -5,7 +5,7 @@
5
  "0": {
6
  "content": "<unk>",
7
  "lstrip": false,
8
- "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
@@ -13,7 +13,7 @@
13
  "1": {
14
  "content": "<|startoftext|>",
15
  "lstrip": false,
16
- "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
@@ -21,22 +21,52 @@
21
  "2": {
22
  "content": "<|endoftext|>",
23
  "lstrip": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
  }
29
  },
 
 
 
 
 
30
  "bos_token": "<|startoftext|>",
 
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "<|endoftext|>",
33
  "legacy": true,
34
  "model_max_length": 4096,
35
  "pad_token": "<unk>",
36
- "padding_side": "left",
37
  "sp_model_kwargs": {},
38
- "split_special_tokens": false,
39
  "tokenizer_class": "LlamaTokenizer",
40
  "unk_token": "<unk>",
41
- "use_default_system_prompt": false
42
  }
 
5
  "0": {
6
  "content": "<unk>",
7
  "lstrip": false,
8
+ "normalized": true,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
 
13
  "1": {
14
  "content": "<|startoftext|>",
15
  "lstrip": false,
16
+ "normalized": true,
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
 
21
  "2": {
22
  "content": "<|endoftext|>",
23
  "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "6": {
30
+ "content": "<|im_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "7": {
38
+ "content": "<|im_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "8": {
46
+ "content": "<|im_sep|>",
47
+ "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  }
53
  },
54
+ "additional_special_tokens": [
55
+ "<|im_start|>",
56
+ "<|im_end|>",
57
+ "<|im_sep|>"
58
+ ],
59
  "bos_token": "<|startoftext|>",
60
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
61
  "clean_up_tokenization_spaces": false,
62
  "eos_token": "<|endoftext|>",
63
  "legacy": true,
64
  "model_max_length": 4096,
65
  "pad_token": "<unk>",
66
+ "padding_side": "right",
67
  "sp_model_kwargs": {},
68
+ "spaces_between_special_tokens": false,
69
  "tokenizer_class": "LlamaTokenizer",
70
  "unk_token": "<unk>",
71
+ "use_default_system_prompt": true
72
  }