Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -30,7 +30,7 @@ PUSH_TO_HUB = True
|
|
30 |
|
31 |
def load_data():
|
32 |
if not INSTRUCT_FINETUNE_BOOL:
|
33 |
-
dataset = load_dataset(INPUT_DATASET, "cosmopedia-v2", split="train", streaming=True)
|
34 |
start = INIT * SHARD_SIZE
|
35 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
36 |
else:
|
|
|
30 |
|
31 |
def load_data():
|
32 |
if not INSTRUCT_FINETUNE_BOOL:
|
33 |
+
dataset = load_dataset(INPUT_DATASET, "cosmopedia-v2", split="train", num_proc=BATCH_SIZE, streaming=True)
|
34 |
start = INIT * SHARD_SIZE
|
35 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
36 |
else:
|