nroggendorff commited on
Commit
53f6ddc
·
verified ·
1 Parent(s): 05f6fa1

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +7 -7
train.py CHANGED
@@ -239,7 +239,11 @@ def main(push_to_hub=True, is_inst_finetune=False):
239
  dataset = load_data()
240
  print("Loaded data.")
241
 
242
- if not is_inst_finetune and INIT == 0:
 
 
 
 
243
  print("Making Corpus..")
244
  training_corpus = get_training_corpus(dataset)
245
  print("Made Corpus.")
@@ -247,10 +251,6 @@ def main(push_to_hub=True, is_inst_finetune=False):
247
  print("Making Tokenizer..")
248
  tokenizer = create_tokenizer(training_corpus)
249
  print(f"Made Tokenizer with size {len(tokenizer)}.")
250
- else:
251
- print("Loading Tokenizer..")
252
- tokenizer = load_tokenizer()
253
- print("Loaded Tokenizer.")
254
 
255
  # print("Adding Tokens..")
256
  # num_new_tokens = update_tokenizer(tokenizer, dataset)
@@ -261,13 +261,13 @@ def main(push_to_hub=True, is_inst_finetune=False):
261
  configure_tokenizer(tokenizer)
262
  print("Added Tokens.")
263
 
264
- if is_inst_finetune and INIT > 0:
265
  print("Loading Model..")
266
  model = load_model()
267
  print("Loaded Model.")
268
  else:
269
  print("Creating Model..")
270
- model = create_model(tokenizer) if INIT == 0 else load_model()
271
  print("Created Model.")
272
 
273
  print("Resizing Token Embeddings..")
 
239
  dataset = load_data()
240
  print("Loaded data.")
241
 
242
+ if is_inst_finetune and INIT > 0:
243
+ print("Loading Tokenizer..")
244
+ tokenizer = load_tokenizer()
245
+ print("Loaded Tokenizer.")
246
+ else:
247
  print("Making Corpus..")
248
  training_corpus = get_training_corpus(dataset)
249
  print("Made Corpus.")
 
251
  print("Making Tokenizer..")
252
  tokenizer = create_tokenizer(training_corpus)
253
  print(f"Made Tokenizer with size {len(tokenizer)}.")
 
 
 
 
254
 
255
  # print("Adding Tokens..")
256
  # num_new_tokens = update_tokenizer(tokenizer, dataset)
 
261
  configure_tokenizer(tokenizer)
262
  print("Added Tokens.")
263
 
264
+ if is_inst_finetune or INIT > 0:
265
  print("Loading Model..")
266
  model = load_model()
267
  print("Loaded Model.")
268
  else:
269
  print("Creating Model..")
270
+ model = create_model(tokenizer)
271
  print("Created Model.")
272
 
273
  print("Resizing Token Embeddings..")