nroggendorff commited on
Commit
dfffe28
·
verified ·
1 Parent(s): 5ed2bdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -73,24 +73,22 @@ def create_model(tokenizer, factor):
73
  return model
74
 
75
  def configure_tokenizer(tokenizer):
76
- tokenizer.bos_token = "<s>"
77
- tokenizer.eos_token = "</s>"
78
- tokenizer.unk_token = "<unk>"
79
- tokenizer.pad_token = "<pad>"
80
- tokenizer.mask_token = "<mask>"
81
-
82
- tokenizer.additional_special_tokens = ["<|user|>", "<|bot|>", "<|end|>"]
 
 
83
 
84
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
85
  tokenizer.assistant_token_id = tokenizer.convert_tokens_to_ids("<|bot|>")
86
 
87
- chat_template = "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{{ eos_token }}"
88
  tokenizer.chat_template = chat_template
89
 
90
- tokenizer.add_special_tokens({
91
- "additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
92
- })
93
-
94
  def train_model(model, tokenizer, dataset):
95
  args = TrainingArguments(
96
  output_dir="model",
 
73
  return model
74
 
75
  def configure_tokenizer(tokenizer):
76
+ special_tokens = {
77
+ "bos_token": "<s>",
78
+ "eos_token": "</s>",
79
+ "unk_token": "<unk>",
80
+ "pad_token": "<pad>",
81
+ "mask_token": "<mask>",
82
+ "additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
83
+ }
84
+ tokenizer.add_special_tokens(special_tokens)
85
 
86
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
87
  tokenizer.assistant_token_id = tokenizer.convert_tokens_to_ids("<|bot|>")
88
 
89
+ chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{{ eos_token }}"
90
  tokenizer.chat_template = chat_template
91
 
 
 
 
 
92
  def train_model(model, tokenizer, dataset):
93
  args = TrainingArguments(
94
  output_dir="model",