tangledgroup
/

tangled-alpha-0.9-core

@@ -63,7 +63,7 @@ train:
   global_batch_size: 32
   # Number of samples per data-parallel rank (type: int, default: 4)
-  micro_batch_size: 1
   # Number of iterations with learning rate warmup active (type: int, default: 2000)
   lr_warmup_steps: 0

   global_batch_size: 32
   # Number of samples per data-parallel rank (type: int, default: 4)
+  micro_batch_size: 4
   # Number of iterations with learning rate warmup active (type: int, default: 2000)
   lr_warmup_steps: 0