Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -46,8 +46,9 @@ def create_tokenizer(training_corpus):
|
|
46 |
|
47 |
def get_training_corpus(dataset):
|
48 |
texts = []
|
49 |
-
for field in ['pretrain', 'instruct']:
|
50 |
-
|
|
|
51 |
|
52 |
for i in range(0, len(texts), 1000):
|
53 |
yield texts[i : i + 1000]
|
|
|
46 |
|
47 |
def get_training_corpus(dataset):
|
48 |
texts = []
|
49 |
+
#for field in ['pretrain', 'instruct']:
|
50 |
+
# texts.extend(dataset[field]['text'])
|
51 |
+
texts.extend(dataset['text'])
|
52 |
|
53 |
for i in range(0, len(texts), 1000):
|
54 |
yield texts[i : i + 1000]
|