Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -106,12 +106,13 @@ def configure_tokenizer(tokenizer):
|
|
106 |
"additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
|
107 |
}
|
108 |
tokenizer.add_special_tokens(special_tokens)
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
|
114 |
-
tokenizer.chat_template = chat_template
|
115 |
|
116 |
def train_model(model, tokenizer, dataset, push, isinst):
|
117 |
args = TrainingArguments(
|
@@ -167,8 +168,8 @@ def main(push_to_hub=True, is_inst_finetune):
|
|
167 |
instruct = dataset['instruct']
|
168 |
training_corpus = get_training_corpus(dataset)
|
169 |
tokenizer = create_tokenizer(training_corpus)
|
|
|
170 |
if is_inst_finetune:
|
171 |
-
configure_tokenizer(tokenizer)
|
172 |
model = load_model()
|
173 |
train_model(model, tokenizer, instruct, push_to_hub, True)
|
174 |
else:
|
|
|
106 |
"additional_special_tokens": ["<|user|>", "<|bot|>", "<|end|>"]
|
107 |
}
|
108 |
tokenizer.add_special_tokens(special_tokens)
|
109 |
+
|
110 |
+
if INSTRUCT_FINETUNE_BOOL:
|
111 |
+
tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
|
112 |
+
tokenizer.assistant_token_id = tokenizer.convert_tokens_to_ids("<|bot|>")
|
113 |
|
114 |
+
chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|bot|>\n' + message['content'] + '<|end|>\n' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
|
115 |
+
tokenizer.chat_template = chat_template
|
|
|
|
|
|
|
116 |
|
117 |
def train_model(model, tokenizer, dataset, push, isinst):
|
118 |
args = TrainingArguments(
|
|
|
168 |
instruct = dataset['instruct']
|
169 |
training_corpus = get_training_corpus(dataset)
|
170 |
tokenizer = create_tokenizer(training_corpus)
|
171 |
+
configure_tokenizer(tokenizer)
|
172 |
if is_inst_finetune:
|
|
|
173 |
model = load_model()
|
174 |
train_model(model, tokenizer, instruct, push_to_hub, True)
|
175 |
else:
|