nroggendorff commited on
Commit
3f18689
·
verified ·
1 Parent(s): b40fcc8

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +8 -9
train.py CHANGED
@@ -11,24 +11,23 @@ from torch.utils.data import DataLoader
11
  from torch.cuda.amp import autocast, GradScaler
12
  from itertools import islice
13
 
14
- BATCH_SIZE = 8
15
- EPOCHS = 3
16
- LEARNING_RATE = 1e-4
17
  FACTOR = 12 ** 3 // 3
18
- MAX_SEQ_LENGTH = 512
19
  VOCAB_SIZE = 32000
20
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
21
  INSTRUCT_DATASET = "nroggendorff/elephant"
22
  OUTPUT_REPO = "nroggendorff/smallama"
23
  INSTRUCT_FINETUNE_BOOL = False
24
- INIT = 3#/30
25
- SHARD_SIZE = int(5e+5)
26
  FP16 = True
27
- WARMUP_STEPS = 1000
28
- WEIGHT_DECAY = 0.01
29
  GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // 4
30
  PUSH_TO_HUB = True
31
- NUM_WORKERS = 4
32
 
33
  def load_data():
34
  if not INSTRUCT_FINETUNE_BOOL:
 
11
  from torch.cuda.amp import autocast, GradScaler
12
  from itertools import islice
13
 
14
+ BATCH_SIZE = 16
15
+ EPOCHS = 1
16
+ LEARNING_RATE = 2e-4
17
  FACTOR = 12 ** 3 // 3
18
+ MAX_SEQ_LENGTH = 128
19
  VOCAB_SIZE = 32000
20
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
21
  INSTRUCT_DATASET = "nroggendorff/elephant"
22
  OUTPUT_REPO = "nroggendorff/smallama"
23
  INSTRUCT_FINETUNE_BOOL = False
24
+ INIT = 0
25
+ SHARD_SIZE = int(15e+5)
26
  FP16 = True
27
+ WARMUP_STEPS = 200
28
+ WEIGHT_DECAY = 1e-5
29
  GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // 4
30
  PUSH_TO_HUB = True
 
31
 
32
  def load_data():
33
  if not INSTRUCT_FINETUNE_BOOL: