|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92538": { |
|
"content": "<|plugin|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92539": { |
|
"content": "<|interpreter|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92540": { |
|
"content": "<|action_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92541": { |
|
"content": "<|action_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92542": { |
|
"content": "<|im_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92543": { |
|
"content": "<|im_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92544": { |
|
"content": "<img>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92545": { |
|
"content": "</img>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92546": { |
|
"content": "<IMG_CONTEXT>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92547": { |
|
"content": "<quad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92548": { |
|
"content": "</quad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92549": { |
|
"content": "<ref>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92550": { |
|
"content": "</ref>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92551": { |
|
"content": "<box>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"92552": { |
|
"content": "</box>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<|im_start|>", |
|
"<|im_end|>", |
|
"<|action_start|>", |
|
"<|action_end|>", |
|
"<|interpreter|>", |
|
"<|plugin|>", |
|
"<img>", |
|
"</img>", |
|
"<IMG_CONTEXT>", |
|
"<quad>", |
|
"</quad>", |
|
"<ref>", |
|
"</ref>", |
|
"<box>", |
|
"</box>" |
|
], |
|
"auto_map": { |
|
"AutoTokenizer": [ |
|
"tokenization_internlm2.InternLM2Tokenizer", |
|
null |
|
] |
|
}, |
|
"bos_token": "<s>", |
|
"chat_template": "{{ bos_token }}{% for message in messages %}\n {%- if message['role'] == 'user' and template -%}\n {{- '<|im_start|>' + message['role'] -}}\n {{ '\n# Template:' }}\n {{- '\n' + template }}\n {% if examples %}\n {{- '# Examples:' }}\n {% for example in examples %}\n {{- '## Input:\n' }}\n {{- example['input'] + '\n' }}\n {{- '## Output:\n' }}\n {{- example['output'] | trim }}\n {% endfor %}\n {%- endif %}\n {{- '# Context:' }}\n {% if message['content'] is string %}\n {{- message['content'] | trim }}\n {% else %}\n {% for content in message['content'] %}\n {%- if content is string %}\n {{- content | trim }}\n {%- elif content['type'] == 'text' %}\n {{- content['text'] | trim }}\n {%- endif %}\n {% endfor %}\n {% endif %}\n {{- '<|im_end|> '}}\n {% else %}\n {{- '<|im_start|>' + message['role'] }}\n {% if message['content'] is string %}\n {{- message['content'] | trim }}\n {% else %}\n {% for content in message['content'] %}\n {%- if content is string %}\n {{- content | trim }}\n {%- elif content['type'] == 'text' %}\n {{- content['text'] | trim }}\n {%- endif %}\n {% endfor %}\n {% endif %}\n {{- '<|im_end|> '}}\n {% endif %}\n{% endfor -%}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant' }}\n{% endif -%}\n", |
|
"clean_up_tokenization_spaces": false, |
|
"eos_token": "</s>", |
|
"extra_special_tokens": {}, |
|
"model_max_length": 8192, |
|
"pad_token": "</s>", |
|
"tokenizer_class": "InternLM2Tokenizer", |
|
"unk_token": "<unk>" |
|
} |