nroggendorff commited on
Commit
ca7245e
·
verified ·
1 Parent(s): 5d4d177

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +2 -1
train.py CHANGED
@@ -16,7 +16,7 @@ VOCAB_SIZE = 32000
16
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
17
  INSTRUCT_DATASET = "nroggendorff/elephant"
18
  OUTPUT_REPO = "nroggendorff/smallama"
19
- INSTRUCT_FINETUNE_BOOL = False
20
  FP16 = True
21
  WARMUP_STEPS = 0
22
  DECAY = 0
@@ -179,6 +179,7 @@ def main(push_to_hub=True, is_inst_finetune=False):
179
  configure_tokenizer(tokenizer)
180
  if is_inst_finetune:
181
  model = load_model()
 
182
  train_model(model, tokenizer, dataset, push_to_hub, True)
183
  else:
184
  model = create_model(tokenizer)
 
16
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
17
  INSTRUCT_DATASET = "nroggendorff/elephant"
18
  OUTPUT_REPO = "nroggendorff/smallama"
19
+ INSTRUCT_FINETUNE_BOOL = True
20
  FP16 = True
21
  WARMUP_STEPS = 0
22
  DECAY = 0
 
179
  configure_tokenizer(tokenizer)
180
  if is_inst_finetune:
181
  model = load_model()
182
+ model.resize_token_embeddings(len(tokenizer))
183
  train_model(model, tokenizer, dataset, push_to_hub, True)
184
  else:
185
  model = create_model(tokenizer)