nroggendorff commited on
Commit
dcc51de
·
verified ·
1 Parent(s): d94939a

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +2 -2
train.py CHANGED
@@ -54,7 +54,7 @@ def create_tokenizer(training_corpus):
54
  return fast_tokenizer
55
 
56
  def load_tokenizer():
57
- return AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")#OUTPUT_REPO)
58
 
59
  def get_training_corpus(dataset):
60
  for i in range(0, len(dataset['text']), 1000):
@@ -176,7 +176,7 @@ def train_model(model, tokenizer, dataset, push, isinst):
176
 
177
  def main(push_to_hub=True, is_inst_finetune=False):
178
  dataset = load_data()
179
- if not is_inst_finetune and INIT == 0 and False:
180
  training_corpus = get_training_corpus(dataset)
181
  tokenizer = create_tokenizer(training_corpus)
182
  else:
 
54
  return fast_tokenizer
55
 
56
  def load_tokenizer():
57
+ return AutoTokenizer.from_pretrained(OUTPUT_REPO)
58
 
59
  def get_training_corpus(dataset):
60
  for i in range(0, len(dataset['text']), 1000):
 
176
 
177
  def main(push_to_hub=True, is_inst_finetune=False):
178
  dataset = load_data()
179
+ if not is_inst_finetune and INIT == 0:
180
  training_corpus = get_training_corpus(dataset)
181
  tokenizer = create_tokenizer(training_corpus)
182
  else: