Training in progress, epoch 8, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 442668636
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0195e95b07a008ca0adb918c3cf710d2dd35e5922e1076eded57fcb47ec29bc3
|
3 |
size 442668636
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 885457146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ed50d1f812bf88be9e3a3ef60b44e88993ed9aab461dcd502248ba4e966d769
|
3 |
size 885457146
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea9f1a22dcc6b48df80d64487fe2cb15b598edf3b547bbcd4fb8ed5b43754655
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4da94ddd87a3dbe17d3a75825e2b60c2edfc2fd36631a373db6346d564b8b7bd
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 5.
|
3 |
-
"best_model_checkpoint": "./results/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -539,6 +539,81 @@
|
|
539 |
"eval_samples_per_second": 269.074,
|
540 |
"eval_steps_per_second": 8.41,
|
541 |
"step": 6412
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
}
|
543 |
],
|
544 |
"logging_steps": 100,
|
@@ -553,12 +628,12 @@
|
|
553 |
"should_evaluate": false,
|
554 |
"should_log": false,
|
555 |
"should_save": true,
|
556 |
-
"should_training_stop":
|
557 |
},
|
558 |
"attributes": {}
|
559 |
}
|
560 |
},
|
561 |
-
"total_flos": 1.
|
562 |
"train_batch_size": 32,
|
563 |
"trial_name": null,
|
564 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 5.320092519124349,
|
3 |
+
"best_model_checkpoint": "./results/checkpoint-7328",
|
4 |
+
"epoch": 8.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7328,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
539 |
"eval_samples_per_second": 269.074,
|
540 |
"eval_steps_per_second": 8.41,
|
541 |
"step": 6412
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 7.096069868995633,
|
545 |
+
"grad_norm": 30.43601417541504,
|
546 |
+
"learning_rate": 5.649563318777293e-06,
|
547 |
+
"loss": 4.4245,
|
548 |
+
"step": 6500
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 7.205240174672489,
|
552 |
+
"grad_norm": 32.60799789428711,
|
553 |
+
"learning_rate": 4.967248908296943e-06,
|
554 |
+
"loss": 4.2516,
|
555 |
+
"step": 6600
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 7.314410480349345,
|
559 |
+
"grad_norm": 32.61433792114258,
|
560 |
+
"learning_rate": 4.284934497816594e-06,
|
561 |
+
"loss": 4.2598,
|
562 |
+
"step": 6700
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 7.423580786026201,
|
566 |
+
"grad_norm": 28.708969116210938,
|
567 |
+
"learning_rate": 3.6026200873362447e-06,
|
568 |
+
"loss": 4.1993,
|
569 |
+
"step": 6800
|
570 |
+
},
|
571 |
+
{
|
572 |
+
"epoch": 7.532751091703057,
|
573 |
+
"grad_norm": 32.070068359375,
|
574 |
+
"learning_rate": 2.920305676855895e-06,
|
575 |
+
"loss": 4.2718,
|
576 |
+
"step": 6900
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"epoch": 7.641921397379913,
|
580 |
+
"grad_norm": 32.91472625732422,
|
581 |
+
"learning_rate": 2.237991266375546e-06,
|
582 |
+
"loss": 4.3288,
|
583 |
+
"step": 7000
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"epoch": 7.751091703056769,
|
587 |
+
"grad_norm": 26.058467864990234,
|
588 |
+
"learning_rate": 1.5556768558951965e-06,
|
589 |
+
"loss": 4.2426,
|
590 |
+
"step": 7100
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 7.860262008733624,
|
594 |
+
"grad_norm": 26.022476196289062,
|
595 |
+
"learning_rate": 8.733624454148472e-07,
|
596 |
+
"loss": 4.3651,
|
597 |
+
"step": 7200
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"epoch": 7.96943231441048,
|
601 |
+
"grad_norm": 37.77171325683594,
|
602 |
+
"learning_rate": 1.910480349344978e-07,
|
603 |
+
"loss": 4.1779,
|
604 |
+
"step": 7300
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 8.0,
|
608 |
+
"eval_avg_mae": 5.320092519124349,
|
609 |
+
"eval_loss": 5.320092678070068,
|
610 |
+
"eval_mae_lex": 4.7238664627075195,
|
611 |
+
"eval_mae_sem": 3.55670166015625,
|
612 |
+
"eval_mae_syn": 7.6797099113464355,
|
613 |
+
"eval_runtime": 27.2585,
|
614 |
+
"eval_samples_per_second": 268.797,
|
615 |
+
"eval_steps_per_second": 8.401,
|
616 |
+
"step": 7328
|
617 |
}
|
618 |
],
|
619 |
"logging_steps": 100,
|
|
|
628 |
"should_evaluate": false,
|
629 |
"should_log": false,
|
630 |
"should_save": true,
|
631 |
+
"should_training_stop": true
|
632 |
},
|
633 |
"attributes": {}
|
634 |
}
|
635 |
},
|
636 |
+
"total_flos": 1.5422129866708992e+16,
|
637 |
"train_batch_size": 32,
|
638 |
"trial_name": null,
|
639 |
"trial_params": null
|