nroggendorff commited on
Commit
861cd57
·
verified ·
1 Parent(s): e485f7a

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +2 -1
train.py CHANGED
@@ -79,7 +79,7 @@ def format_prompts(examples, tokenizer, isinst):
79
 
80
  def create_model(tokenizer):
81
  config = LlamaConfig(
82
- vocab_size=tokenizer.vocab_size + 10,
83
  hidden_size=FACTOR,
84
  intermediate_size=FACTOR * 4,
85
  num_hidden_layers=12,
@@ -185,6 +185,7 @@ def main(push_to_hub=True, is_inst_finetune=False):
185
  model.resize_token_embeddings(len(tokenizer))
186
  else:
187
  model = create_model(tokenizer) if INIT == 0 else load_model()
 
188
 
189
  train_model(model, tokenizer, dataset, push_to_hub, is_inst_finetune)
190
 
 
79
 
80
  def create_model(tokenizer):
81
  config = LlamaConfig(
82
+ vocab_size=tokenizer.vocab_size,
83
  hidden_size=FACTOR,
84
  intermediate_size=FACTOR * 4,
85
  num_hidden_layers=12,
 
185
  model.resize_token_embeddings(len(tokenizer))
186
  else:
187
  model = create_model(tokenizer) if INIT == 0 else load_model()
188
+ model.resize_token_embeddings(len(tokenizer))
189
 
190
  train_model(model, tokenizer, dataset, push_to_hub, is_inst_finetune)
191