Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -23,7 +23,7 @@ INSTRUCT_DATASET = "nroggendorff/elephant"
|
|
23 |
OUTPUT_REPO = "nroggendorff/smallama"
|
24 |
INSTRUCT_FINETUNE_BOOL = True
|
25 |
INIT = 0
|
26 |
-
SHARD_SIZE = int(
|
27 |
FP16 = True
|
28 |
WARMUP_STEPS = 0
|
29 |
WEIGHT_DECAY = 0.
|
@@ -43,7 +43,9 @@ def load_data():
|
|
43 |
start = INIT * SHARD_SIZE
|
44 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
45 |
else:
|
46 |
-
dataset = load_dataset(INSTRUCT_DATASET, split="train")
|
|
|
|
|
47 |
return dataset
|
48 |
|
49 |
def create_tokenizer(training_corpus):
|
|
|
23 |
OUTPUT_REPO = "nroggendorff/smallama"
|
24 |
INSTRUCT_FINETUNE_BOOL = True
|
25 |
INIT = 0
|
26 |
+
SHARD_SIZE = int(15e+5)
|
27 |
FP16 = True
|
28 |
WARMUP_STEPS = 0
|
29 |
WEIGHT_DECAY = 0.
|
|
|
43 |
start = INIT * SHARD_SIZE
|
44 |
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
45 |
else:
|
46 |
+
dataset = load_dataset(INSTRUCT_DATASET, split="train", streaming=True)
|
47 |
+
start = INIT * SHARD_SIZE
|
48 |
+
dataset = Dataset.from_dict({'text': [example['text'] for example in islice(dataset, start, start + SHARD_SIZE)]})
|
49 |
return dataset
|
50 |
|
51 |
def create_tokenizer(training_corpus):
|