Spaces:

nroggendorff
/

train-llama

Runtime error

nroggendorff commited on Oct 20, 2024

Commit

3f18689

verified ·

1 Parent(s): b40fcc8

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -11,24 +11,23 @@ from torch.utils.data import DataLoader
 from torch.cuda.amp import autocast, GradScaler
 from itertools import islice
-BATCH_SIZE = 8
-EPOCHS = 3
-LEARNING_RATE = 1e-4
 FACTOR = 12 ** 3 // 3
-MAX_SEQ_LENGTH = 512
 VOCAB_SIZE = 32000
 INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
 INSTRUCT_DATASET = "nroggendorff/elephant"
 OUTPUT_REPO = "nroggendorff/smallama"
 INSTRUCT_FINETUNE_BOOL = False
-INIT = 3#/30
-SHARD_SIZE = int(5e+5)
 FP16 = True
-WARMUP_STEPS = 1000
-WEIGHT_DECAY = 0.01
 GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // 4
 PUSH_TO_HUB = True
-NUM_WORKERS = 4
 def load_data():
     if not INSTRUCT_FINETUNE_BOOL:

 from torch.cuda.amp import autocast, GradScaler
 from itertools import islice
+BATCH_SIZE = 16
+EPOCHS = 1
+LEARNING_RATE = 2e-4
 FACTOR = 12 ** 3 // 3
+MAX_SEQ_LENGTH = 128
 VOCAB_SIZE = 32000
 INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
 INSTRUCT_DATASET = "nroggendorff/elephant"
 OUTPUT_REPO = "nroggendorff/smallama"
 INSTRUCT_FINETUNE_BOOL = False
+INIT = 0
+SHARD_SIZE = int(15e+5)
 FP16 = True
+WARMUP_STEPS = 200
+WEIGHT_DECAY = 1e-5
 GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // 4
 PUSH_TO_HUB = True
 def load_data():
     if not INSTRUCT_FINETUNE_BOOL: