Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +178 -3
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1852600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c004f34c54fd1f3daf8e261d84a7a1757591cd731ccc03e414920ea5c14307b
|
| 3 |
size 1852600
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1108346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:851725364a78e2688d08b65ac507b37eb1cd1824bb79778d93a924c368ade572
|
| 3 |
size 1108346
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:471500541aec9e4570286570c42981a081703fc6b0eca9480f56cf2521c795bd
|
| 3 |
size 14244
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69c5572f1342b41c6a930522d275b4fa95be59a8658b646079d52144a96dd33c
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10507,6 +10507,181 @@
|
|
| 10507 |
"learning_rate": 3.934426229508197e-06,
|
| 10508 |
"loss": 0.1866,
|
| 10509 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10510 |
}
|
| 10511 |
],
|
| 10512 |
"logging_steps": 20,
|
|
@@ -10526,7 +10701,7 @@
|
|
| 10526 |
"attributes": {}
|
| 10527 |
}
|
| 10528 |
},
|
| 10529 |
-
"total_flos": 3.
|
| 10530 |
"train_batch_size": 1,
|
| 10531 |
"trial_name": null,
|
| 10532 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 148.87126296522268,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 30500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10507 |
"learning_rate": 3.934426229508197e-06,
|
| 10508 |
"loss": 0.1866,
|
| 10509 |
"step": 30000
|
| 10510 |
+
},
|
| 10511 |
+
{
|
| 10512 |
+
"epoch": 146.52837095790116,
|
| 10513 |
+
"grad_norm": 2.8058741092681885,
|
| 10514 |
+
"learning_rate": 3.8032786885245906e-06,
|
| 10515 |
+
"loss": 0.1751,
|
| 10516 |
+
"step": 30020
|
| 10517 |
+
},
|
| 10518 |
+
{
|
| 10519 |
+
"epoch": 146.62599145820622,
|
| 10520 |
+
"grad_norm": 2.752978563308716,
|
| 10521 |
+
"learning_rate": 3.672131147540984e-06,
|
| 10522 |
+
"loss": 0.1826,
|
| 10523 |
+
"step": 30040
|
| 10524 |
+
},
|
| 10525 |
+
{
|
| 10526 |
+
"epoch": 146.7236119585113,
|
| 10527 |
+
"grad_norm": 3.0315961837768555,
|
| 10528 |
+
"learning_rate": 3.540983606557377e-06,
|
| 10529 |
+
"loss": 0.1623,
|
| 10530 |
+
"step": 30060
|
| 10531 |
+
},
|
| 10532 |
+
{
|
| 10533 |
+
"epoch": 146.82123245881635,
|
| 10534 |
+
"grad_norm": 3.3782765865325928,
|
| 10535 |
+
"learning_rate": 3.409836065573771e-06,
|
| 10536 |
+
"loss": 0.1784,
|
| 10537 |
+
"step": 30080
|
| 10538 |
+
},
|
| 10539 |
+
{
|
| 10540 |
+
"epoch": 146.9188529591214,
|
| 10541 |
+
"grad_norm": 2.961002826690674,
|
| 10542 |
+
"learning_rate": 3.278688524590164e-06,
|
| 10543 |
+
"loss": 0.1764,
|
| 10544 |
+
"step": 30100
|
| 10545 |
+
},
|
| 10546 |
+
{
|
| 10547 |
+
"epoch": 147.0164734594265,
|
| 10548 |
+
"grad_norm": 3.775484561920166,
|
| 10549 |
+
"learning_rate": 3.1475409836065574e-06,
|
| 10550 |
+
"loss": 0.1595,
|
| 10551 |
+
"step": 30120
|
| 10552 |
+
},
|
| 10553 |
+
{
|
| 10554 |
+
"epoch": 147.11409395973155,
|
| 10555 |
+
"grad_norm": 4.18531608581543,
|
| 10556 |
+
"learning_rate": 3.016393442622951e-06,
|
| 10557 |
+
"loss": 0.1599,
|
| 10558 |
+
"step": 30140
|
| 10559 |
+
},
|
| 10560 |
+
{
|
| 10561 |
+
"epoch": 147.2117144600366,
|
| 10562 |
+
"grad_norm": 2.9723432064056396,
|
| 10563 |
+
"learning_rate": 2.8852459016393446e-06,
|
| 10564 |
+
"loss": 0.159,
|
| 10565 |
+
"step": 30160
|
| 10566 |
+
},
|
| 10567 |
+
{
|
| 10568 |
+
"epoch": 147.30933496034166,
|
| 10569 |
+
"grad_norm": 3.2833070755004883,
|
| 10570 |
+
"learning_rate": 2.754098360655738e-06,
|
| 10571 |
+
"loss": 0.1657,
|
| 10572 |
+
"step": 30180
|
| 10573 |
+
},
|
| 10574 |
+
{
|
| 10575 |
+
"epoch": 147.40695546064674,
|
| 10576 |
+
"grad_norm": 3.4174959659576416,
|
| 10577 |
+
"learning_rate": 2.6229508196721314e-06,
|
| 10578 |
+
"loss": 0.175,
|
| 10579 |
+
"step": 30200
|
| 10580 |
+
},
|
| 10581 |
+
{
|
| 10582 |
+
"epoch": 147.5045759609518,
|
| 10583 |
+
"grad_norm": 3.3127195835113525,
|
| 10584 |
+
"learning_rate": 2.491803278688525e-06,
|
| 10585 |
+
"loss": 0.1815,
|
| 10586 |
+
"step": 30220
|
| 10587 |
+
},
|
| 10588 |
+
{
|
| 10589 |
+
"epoch": 147.60219646125685,
|
| 10590 |
+
"grad_norm": 3.7137949466705322,
|
| 10591 |
+
"learning_rate": 2.360655737704918e-06,
|
| 10592 |
+
"loss": 0.198,
|
| 10593 |
+
"step": 30240
|
| 10594 |
+
},
|
| 10595 |
+
{
|
| 10596 |
+
"epoch": 147.69981696156194,
|
| 10597 |
+
"grad_norm": 2.630924701690674,
|
| 10598 |
+
"learning_rate": 2.2295081967213117e-06,
|
| 10599 |
+
"loss": 0.1687,
|
| 10600 |
+
"step": 30260
|
| 10601 |
+
},
|
| 10602 |
+
{
|
| 10603 |
+
"epoch": 147.797437461867,
|
| 10604 |
+
"grad_norm": 3.330245018005371,
|
| 10605 |
+
"learning_rate": 2.098360655737705e-06,
|
| 10606 |
+
"loss": 0.1581,
|
| 10607 |
+
"step": 30280
|
| 10608 |
+
},
|
| 10609 |
+
{
|
| 10610 |
+
"epoch": 147.89505796217205,
|
| 10611 |
+
"grad_norm": 3.237410068511963,
|
| 10612 |
+
"learning_rate": 1.9672131147540985e-06,
|
| 10613 |
+
"loss": 0.2022,
|
| 10614 |
+
"step": 30300
|
| 10615 |
+
},
|
| 10616 |
+
{
|
| 10617 |
+
"epoch": 147.99267846247713,
|
| 10618 |
+
"grad_norm": 2.633331537246704,
|
| 10619 |
+
"learning_rate": 1.836065573770492e-06,
|
| 10620 |
+
"loss": 0.1613,
|
| 10621 |
+
"step": 30320
|
| 10622 |
+
},
|
| 10623 |
+
{
|
| 10624 |
+
"epoch": 148.09029896278219,
|
| 10625 |
+
"grad_norm": 2.527902603149414,
|
| 10626 |
+
"learning_rate": 1.7049180327868855e-06,
|
| 10627 |
+
"loss": 0.1749,
|
| 10628 |
+
"step": 30340
|
| 10629 |
+
},
|
| 10630 |
+
{
|
| 10631 |
+
"epoch": 148.18791946308724,
|
| 10632 |
+
"grad_norm": 2.9230234622955322,
|
| 10633 |
+
"learning_rate": 1.5737704918032787e-06,
|
| 10634 |
+
"loss": 0.1464,
|
| 10635 |
+
"step": 30360
|
| 10636 |
+
},
|
| 10637 |
+
{
|
| 10638 |
+
"epoch": 148.28553996339232,
|
| 10639 |
+
"grad_norm": 2.591038703918457,
|
| 10640 |
+
"learning_rate": 1.4426229508196723e-06,
|
| 10641 |
+
"loss": 0.1819,
|
| 10642 |
+
"step": 30380
|
| 10643 |
+
},
|
| 10644 |
+
{
|
| 10645 |
+
"epoch": 148.38316046369738,
|
| 10646 |
+
"grad_norm": 3.6826913356781006,
|
| 10647 |
+
"learning_rate": 1.3114754098360657e-06,
|
| 10648 |
+
"loss": 0.1909,
|
| 10649 |
+
"step": 30400
|
| 10650 |
+
},
|
| 10651 |
+
{
|
| 10652 |
+
"epoch": 148.48078096400243,
|
| 10653 |
+
"grad_norm": 3.1828205585479736,
|
| 10654 |
+
"learning_rate": 1.180327868852459e-06,
|
| 10655 |
+
"loss": 0.1727,
|
| 10656 |
+
"step": 30420
|
| 10657 |
+
},
|
| 10658 |
+
{
|
| 10659 |
+
"epoch": 148.57840146430752,
|
| 10660 |
+
"grad_norm": 3.3356974124908447,
|
| 10661 |
+
"learning_rate": 1.0491803278688525e-06,
|
| 10662 |
+
"loss": 0.1624,
|
| 10663 |
+
"step": 30440
|
| 10664 |
+
},
|
| 10665 |
+
{
|
| 10666 |
+
"epoch": 148.67602196461257,
|
| 10667 |
+
"grad_norm": 3.1692721843719482,
|
| 10668 |
+
"learning_rate": 9.18032786885246e-07,
|
| 10669 |
+
"loss": 0.1769,
|
| 10670 |
+
"step": 30460
|
| 10671 |
+
},
|
| 10672 |
+
{
|
| 10673 |
+
"epoch": 148.77364246491763,
|
| 10674 |
+
"grad_norm": 2.968018054962158,
|
| 10675 |
+
"learning_rate": 7.868852459016393e-07,
|
| 10676 |
+
"loss": 0.1594,
|
| 10677 |
+
"step": 30480
|
| 10678 |
+
},
|
| 10679 |
+
{
|
| 10680 |
+
"epoch": 148.87126296522268,
|
| 10681 |
+
"grad_norm": 3.693136692047119,
|
| 10682 |
+
"learning_rate": 6.557377049180328e-07,
|
| 10683 |
+
"loss": 0.1927,
|
| 10684 |
+
"step": 30500
|
| 10685 |
}
|
| 10686 |
],
|
| 10687 |
"logging_steps": 20,
|
|
|
|
| 10701 |
"attributes": {}
|
| 10702 |
}
|
| 10703 |
},
|
| 10704 |
+
"total_flos": 3.924112697660375e+17,
|
| 10705 |
"train_batch_size": 1,
|
| 10706 |
"trial_name": null,
|
| 10707 |
"trial_params": null
|