ngwgsang commited on
Commit
871055a
·
verified ·
1 Parent(s): 95fb66e

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a87059de773a647f0f146c9c6d0b08d5535f799d9fa1c34e6aec16f5407fc67c
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0c8d4acae67ad3b83cbd4e674eafe4b5a6c8d746be9c99d1ef727781796bb5
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7994e3322be09b5f88a7dbc2a5814c66fcb49eac3f6f61e2db6e5b2dd3f344e8
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41230f95650a3fed4682e423dc55159f44a49adabeba81e64b3b775c35abfd0c
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473ecb09e5f106de8046a76cc9b1107489610a4ca8d22c8acd37629ea6ee333c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff6cc6d0cbab2c0d1b846e878224dd13dea331cdbd300a43f50d003878abb4b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2068a5d955976d9311723e9deb7bf5d0b82ab774ba392808af341e32685de248
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1c1820d44ac0531e8dbb97bd434cc82c577f59ee0c117b537ddaef94516da4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 5.272278467814128,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 5496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -457,6 +457,88 @@
457
  "eval_samples_per_second": 270.548,
458
  "eval_steps_per_second": 8.456,
459
  "step": 5496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
  "logging_steps": 100,
@@ -476,7 +558,7 @@
476
  "attributes": {}
477
  }
478
  },
479
- "total_flos": 1.1566597400031744e+16,
480
  "train_batch_size": 32,
481
  "trial_name": null,
482
  "trial_params": null
 
1
  {
2
  "best_metric": 5.272278467814128,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 6412,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
457
  "eval_samples_per_second": 270.548,
458
  "eval_steps_per_second": 8.456,
459
  "step": 5496
460
+ },
461
+ {
462
+ "epoch": 6.004366812227074,
463
+ "grad_norm": 29.006881713867188,
464
+ "learning_rate": 7.483624454148472e-06,
465
+ "loss": 4.8767,
466
+ "step": 5500
467
+ },
468
+ {
469
+ "epoch": 6.11353711790393,
470
+ "grad_norm": 29.40102195739746,
471
+ "learning_rate": 7.074235807860262e-06,
472
+ "loss": 4.6832,
473
+ "step": 5600
474
+ },
475
+ {
476
+ "epoch": 6.222707423580786,
477
+ "grad_norm": 32.63214111328125,
478
+ "learning_rate": 6.664847161572053e-06,
479
+ "loss": 4.6676,
480
+ "step": 5700
481
+ },
482
+ {
483
+ "epoch": 6.331877729257642,
484
+ "grad_norm": 32.83290481567383,
485
+ "learning_rate": 6.2554585152838425e-06,
486
+ "loss": 4.6235,
487
+ "step": 5800
488
+ },
489
+ {
490
+ "epoch": 6.441048034934497,
491
+ "grad_norm": 36.680030822753906,
492
+ "learning_rate": 5.846069868995633e-06,
493
+ "loss": 4.4681,
494
+ "step": 5900
495
+ },
496
+ {
497
+ "epoch": 6.550218340611353,
498
+ "grad_norm": 37.740535736083984,
499
+ "learning_rate": 5.436681222707424e-06,
500
+ "loss": 4.518,
501
+ "step": 6000
502
+ },
503
+ {
504
+ "epoch": 6.6593886462882095,
505
+ "grad_norm": 33.8775749206543,
506
+ "learning_rate": 5.027292576419214e-06,
507
+ "loss": 4.6268,
508
+ "step": 6100
509
+ },
510
+ {
511
+ "epoch": 6.7685589519650655,
512
+ "grad_norm": 50.033729553222656,
513
+ "learning_rate": 4.617903930131005e-06,
514
+ "loss": 4.6026,
515
+ "step": 6200
516
+ },
517
+ {
518
+ "epoch": 6.877729257641922,
519
+ "grad_norm": 34.454341888427734,
520
+ "learning_rate": 4.208515283842795e-06,
521
+ "loss": 4.618,
522
+ "step": 6300
523
+ },
524
+ {
525
+ "epoch": 6.986899563318778,
526
+ "grad_norm": 41.41761779785156,
527
+ "learning_rate": 3.799126637554585e-06,
528
+ "loss": 4.6299,
529
+ "step": 6400
530
+ },
531
+ {
532
+ "epoch": 7.0,
533
+ "eval_avg_mae": 5.449769337972005,
534
+ "eval_loss": 5.449769496917725,
535
+ "eval_mae_lex": 4.877564430236816,
536
+ "eval_mae_sem": 3.8569977283477783,
537
+ "eval_mae_syn": 7.614744663238525,
538
+ "eval_runtime": 27.0562,
539
+ "eval_samples_per_second": 270.806,
540
+ "eval_steps_per_second": 8.464,
541
+ "step": 6412
542
  }
543
  ],
544
  "logging_steps": 100,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 1.3494363633370368e+16,
562
  "train_batch_size": 32,
563
  "trial_name": null,
564
  "trial_params": null