ngwgsang commited on
Commit
d07e1d7
·
verified ·
1 Parent(s): a430b26

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61b427c7e90098665041fc0a0a842c2a81a5d94700dec207f9f18e8fa3e68fe
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dab21a3309f43892efdf6d32d3eb2a83f87740fc40912ec53fff44ce1639a8b
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af3007c1d17883475830f0951448eb8859176b25fee74d22b9b7c1cb61d5b54a
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ff253a82aaff7be91301625082eac8e395a14f23fc079cb32647b9b6e5388e
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:049c6f40a328629846cab1b27e3807d44ea469304a69ff0f3d676cc813cde6b3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5dc815408d1d7abfa9ba6c0dc7793cae2a75c860d693e06433fbcdfbd86b13d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7668389514d10a3d53f140c85ff46df71dcd9dc34fbc1ed6530f2d1a175df2a0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d384c207c19b3bb34dc652f0d12803de0a7eba856a942818f2ab5a8834f9006c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 5.645811716715495,
3
- "best_model_checkpoint": "./results/checkpoint-2748",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 3664,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,6 +307,81 @@
307
  "eval_samples_per_second": 269.988,
308
  "eval_steps_per_second": 8.438,
309
  "step": 3664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 100,
@@ -326,7 +401,7 @@
326
  "attributes": {}
327
  }
328
  },
329
- "total_flos": 7711064933354496.0,
330
  "train_batch_size": 32,
331
  "trial_name": null,
332
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.481770197550456,
3
+ "best_model_checkpoint": "./results/checkpoint-4580",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 4580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "eval_samples_per_second": 269.988,
308
  "eval_steps_per_second": 8.438,
309
  "step": 3664
310
+ },
311
+ {
312
+ "epoch": 4.039301310043668,
313
+ "grad_norm": 37.76191329956055,
314
+ "learning_rate": 1.4852620087336245e-05,
315
+ "loss": 5.4072,
316
+ "step": 3700
317
+ },
318
+ {
319
+ "epoch": 4.148471615720524,
320
+ "grad_norm": 33.29827117919922,
321
+ "learning_rate": 1.4443231441048035e-05,
322
+ "loss": 5.0985,
323
+ "step": 3800
324
+ },
325
+ {
326
+ "epoch": 4.25764192139738,
327
+ "grad_norm": 47.478206634521484,
328
+ "learning_rate": 1.4033842794759826e-05,
329
+ "loss": 5.1541,
330
+ "step": 3900
331
+ },
332
+ {
333
+ "epoch": 4.366812227074236,
334
+ "grad_norm": 31.66642189025879,
335
+ "learning_rate": 1.3624454148471617e-05,
336
+ "loss": 5.106,
337
+ "step": 4000
338
+ },
339
+ {
340
+ "epoch": 4.475982532751091,
341
+ "grad_norm": 27.389015197753906,
342
+ "learning_rate": 1.3215065502183406e-05,
343
+ "loss": 5.1793,
344
+ "step": 4100
345
+ },
346
+ {
347
+ "epoch": 4.585152838427947,
348
+ "grad_norm": 26.702226638793945,
349
+ "learning_rate": 1.2805676855895198e-05,
350
+ "loss": 5.0975,
351
+ "step": 4200
352
+ },
353
+ {
354
+ "epoch": 4.6943231441048034,
355
+ "grad_norm": 31.537691116333008,
356
+ "learning_rate": 1.2396288209606987e-05,
357
+ "loss": 4.9613,
358
+ "step": 4300
359
+ },
360
+ {
361
+ "epoch": 4.8034934497816595,
362
+ "grad_norm": 26.946945190429688,
363
+ "learning_rate": 1.1986899563318778e-05,
364
+ "loss": 5.1799,
365
+ "step": 4400
366
+ },
367
+ {
368
+ "epoch": 4.9126637554585155,
369
+ "grad_norm": 27.92361068725586,
370
+ "learning_rate": 1.1577510917030569e-05,
371
+ "loss": 5.0393,
372
+ "step": 4500
373
+ },
374
+ {
375
+ "epoch": 5.0,
376
+ "eval_avg_mae": 5.481770197550456,
377
+ "eval_loss": 5.481770038604736,
378
+ "eval_mae_lex": 4.809901714324951,
379
+ "eval_mae_sem": 3.8779022693634033,
380
+ "eval_mae_syn": 7.757506370544434,
381
+ "eval_runtime": 27.0994,
382
+ "eval_samples_per_second": 270.375,
383
+ "eval_steps_per_second": 8.45,
384
+ "step": 4580
385
  }
386
  ],
387
  "logging_steps": 100,
 
401
  "attributes": {}
402
  }
403
  },
404
+ "total_flos": 9638831166693120.0,
405
  "train_batch_size": 32,
406
  "trial_name": null,
407
  "trial_params": null