nroggendorff commited on
Commit
d4793df
·
verified ·
1 Parent(s): 3304e16

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +7 -6
train.py CHANGED
@@ -106,12 +106,13 @@ def configure_tokenizer(tokenizer):
106
  "additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
107
  }
108
  tokenizer.add_special_tokens(special_tokens)
 
 
 
 
109
 
110
- tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
111
- tokenizer.assistant_token_id = tokenizer.convert_tokens_to_ids("<|bot|>")
112
-
113
- chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
114
- tokenizer.chat_template = chat_template
115
 
116
  def train_model(model, tokenizer, dataset, push, isinst):
117
  args = TrainingArguments(
@@ -167,8 +168,8 @@ def main(push_to_hub=True, is_inst_finetune):
167
  instruct = dataset['instruct']
168
  training_corpus = get_training_corpus(dataset)
169
  tokenizer = create_tokenizer(training_corpus)
 
170
  if is_inst_finetune:
171
- configure_tokenizer(tokenizer)
172
  model = load_model()
173
  train_model(model, tokenizer, instruct, push_to_hub, True)
174
  else:
 
106
  "additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
107
  }
108
  tokenizer.add_special_tokens(special_tokens)
109
+
110
+ if INSTRUCT_FINETUNE_BOOL:
111
+ tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
112
+ tokenizer.assistant_token_id = tokenizer.convert_tokens_to_ids("<|bot|>")
113
 
114
+ chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
115
+ tokenizer.chat_template = chat_template
 
 
 
116
 
117
  def train_model(model, tokenizer, dataset, push, isinst):
118
  args = TrainingArguments(
 
168
  instruct = dataset['instruct']
169
  training_corpus = get_training_corpus(dataset)
170
  tokenizer = create_tokenizer(training_corpus)
171
+ configure_tokenizer(tokenizer)
172
  if is_inst_finetune:
 
173
  model = load_model()
174
  train_model(model, tokenizer, instruct, push_to_hub, True)
175
  else: