nbroad commited on
Commit
d5b118c
Β·
verified Β·
1 Parent(s): 8cc8794

Training in progress, step 280

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb088b155560be0e8a8bf00afcaf4a90e2ea16f2794a67206263cafc621757c
3
  size 174655536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb006299516fc0ae808037a652e46dedb7eb19af599c01dac67689022b14def
3
  size 174655536
wandb/run-20250202_235451-rfjfhgaw/files/output.log CHANGED
@@ -214,3 +214,16 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
214
  {'loss': 0.126, 'grad_norm': 1.300278663635254, 'learning_rate': 8.687862490269232e-06, 'epoch': 0.72}
215
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
216
  {'eval_loss': 0.15854938328266144, 'eval_runtime': 23.1914, 'eval_samples_per_second': 21.56, 'eval_steps_per_second': 0.906, 'epoch': 0.72}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  {'loss': 0.126, 'grad_norm': 1.300278663635254, 'learning_rate': 8.687862490269232e-06, 'epoch': 0.72}
215
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
216
  {'eval_loss': 0.15854938328266144, 'eval_runtime': 23.1914, 'eval_samples_per_second': 21.56, 'eval_steps_per_second': 0.906, 'epoch': 0.72}
217
+ 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 280/360 [2:19:08<36:48, 27.61s/it]/usr/local/lib/python3.11/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
218
+ {'loss': 0.1235, 'grad_norm': 1.0385386943817139, 'learning_rate': 8.370158574239466e-06, 'epoch': 0.73}
219
+ {'loss': 0.138, 'grad_norm': 1.8568216562271118, 'learning_rate': 8.056828165944282e-06, 'epoch': 0.73}
220
+ {'loss': 0.1387, 'grad_norm': 1.05785071849823, 'learning_rate': 7.747989096135943e-06, 'epoch': 0.74}
221
+ {'loss': 0.1348, 'grad_norm': 1.2209707498550415, 'learning_rate': 7.443757506558033e-06, 'epoch': 0.74}
222
+ {'loss': 0.1436, 'grad_norm': 2.2911832332611084, 'learning_rate': 7.1442478062692135e-06, 'epoch': 0.75}
223
+ {'loss': 0.1276, 'grad_norm': 1.4366707801818848, 'learning_rate': 6.84957262861873e-06, 'epoch': 0.76}
224
+ {'loss': 0.132, 'grad_norm': 1.3975704908370972, 'learning_rate': 6.559842788889552e-06, 'epoch': 0.76}
225
+ {'loss': 0.121, 'grad_norm': 1.098695993423462, 'learning_rate': 6.275167242625331e-06, 'epoch': 0.77}
226
+ {'loss': 0.1347, 'grad_norm': 2.457047939300537, 'learning_rate': 5.9956530446566305e-06, 'epoch': 0.77}
227
+ {'loss': 0.1298, 'grad_norm': 0.8969805240631104, 'learning_rate': 5.721405308842023e-06, 'epoch': 0.78}
228
+ with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
229
+ {'eval_loss': 0.15774241089820862, 'eval_runtime': 23.2641, 'eval_samples_per_second': 21.492, 'eval_steps_per_second': 0.903, 'epoch': 0.78}
wandb/run-20250202_235451-rfjfhgaw/run-rfjfhgaw.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfccf5e10dd8333a6d34f7307f0deefbda33a25168df58e52f7a9dc615a98be1
3
- size 3014656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe02a8ca2c36e8605f62acdb0cf92b568b0b38b835147a73473da21c6d33ab0
3
+ size 3244032