nroggendorff commited on
Commit
6008f38
·
verified ·
1 Parent(s): ff3a9c3

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +3 -2
train.py CHANGED
@@ -46,8 +46,9 @@ def create_tokenizer(training_corpus):
46
 
47
  def get_training_corpus(dataset):
48
  texts = []
49
- for field in ['pretrain', 'instruct']:
50
- texts.extend(dataset[field]['text'])
 
51
 
52
  for i in range(0, len(texts), 1000):
53
  yield texts[i : i + 1000]
 
46
 
47
  def get_training_corpus(dataset):
48
  texts = []
49
+ #for field in ['pretrain', 'instruct']:
50
+ # texts.extend(dataset[field]['text'])
51
+ texts.extend(dataset['text'])
52
 
53
  for i in range(0, len(texts), 1000):
54
  yield texts[i : i + 1000]