Spaces:
Starting
on
L40S
Starting
on
L40S
Update train.py
Browse files
train.py
CHANGED
|
@@ -30,7 +30,7 @@ PUSH_TO_HUB = True
|
|
| 30 |
|
| 31 |
def load_data():
|
| 32 |
if not INSTRUCT_FINETUNE_BOOL:
|
| 33 |
-
dataset = load_dataset(INPUT_DATASET, "cosmopedia-v2", split="train", streaming=True)
|
| 34 |
start = INIT * SHARD_SIZE
|
| 35 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
| 36 |
else:
|
|
|
|
| 30 |
|
| 31 |
def load_data():
|
| 32 |
if not INSTRUCT_FINETUNE_BOOL:
|
| 33 |
+
dataset = load_dataset(INPUT_DATASET, "cosmopedia-v2", split="train", num_proc=BATCH_SIZE, streaming=True)
|
| 34 |
start = INIT * SHARD_SIZE
|
| 35 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
| 36 |
else:
|