Training in progress, step 2600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 58680
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60a8247fd6ba1f9fe0dbc2ab7765133457470ca6d3a44c71bf94f3d4c10f9e4b
|
3 |
size 58680
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 127270
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba646ce7e6e999342a8ea23619ed724cecef86f809b7b64b8e29bc9e76ab814c
|
3 |
size 127270
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da464f1ab8886439566680dd97216fa326d723519bedac0470d5e0944caeac13
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c97ae609b712bd37f007cb4326617d16b954d56d87dcc6a35ec5b0ba67ade88b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 10.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17715,6 +17715,714 @@
|
|
17715 |
"eval_samples_per_second": 335.868,
|
17716 |
"eval_steps_per_second": 84.096,
|
17717 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17718 |
}
|
17719 |
],
|
17720 |
"logging_steps": 1,
|
@@ -17743,7 +18451,7 @@
|
|
17743 |
"attributes": {}
|
17744 |
}
|
17745 |
},
|
17746 |
-
"total_flos":
|
17747 |
"train_batch_size": 4,
|
17748 |
"trial_name": null,
|
17749 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 10.27136516571045,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-2600",
|
4 |
+
"epoch": 1.7828434313137373,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17715 |
"eval_samples_per_second": 335.868,
|
17716 |
"eval_steps_per_second": 84.096,
|
17717 |
"step": 2500
|
17718 |
+
},
|
17719 |
+
{
|
17720 |
+
"epoch": 1.7149712914559945,
|
17721 |
+
"grad_norm": 0.6548197865486145,
|
17722 |
+
"learning_rate": 1.607647829374337e-06,
|
17723 |
+
"loss": 82.2205,
|
17724 |
+
"step": 2501
|
17725 |
+
},
|
17726 |
+
{
|
17727 |
+
"epoch": 1.7156568686262748,
|
17728 |
+
"grad_norm": 0.6493549942970276,
|
17729 |
+
"learning_rate": 1.5864812845490574e-06,
|
17730 |
+
"loss": 82.2224,
|
17731 |
+
"step": 2502
|
17732 |
+
},
|
17733 |
+
{
|
17734 |
+
"epoch": 1.7163424457965548,
|
17735 |
+
"grad_norm": 0.6554645895957947,
|
17736 |
+
"learning_rate": 1.5654538915235051e-06,
|
17737 |
+
"loss": 82.2281,
|
17738 |
+
"step": 2503
|
17739 |
+
},
|
17740 |
+
{
|
17741 |
+
"epoch": 1.717028022966835,
|
17742 |
+
"grad_norm": 0.7031847834587097,
|
17743 |
+
"learning_rate": 1.5445656800293751e-06,
|
17744 |
+
"loss": 82.2184,
|
17745 |
+
"step": 2504
|
17746 |
+
},
|
17747 |
+
{
|
17748 |
+
"epoch": 1.7177136001371154,
|
17749 |
+
"grad_norm": 0.6444084048271179,
|
17750 |
+
"learning_rate": 1.5238166796015308e-06,
|
17751 |
+
"loss": 82.2735,
|
17752 |
+
"step": 2505
|
17753 |
+
},
|
17754 |
+
{
|
17755 |
+
"epoch": 1.7183991773073957,
|
17756 |
+
"grad_norm": 0.643793523311615,
|
17757 |
+
"learning_rate": 1.5032069195780374e-06,
|
17758 |
+
"loss": 82.1628,
|
17759 |
+
"step": 2506
|
17760 |
+
},
|
17761 |
+
{
|
17762 |
+
"epoch": 1.719084754477676,
|
17763 |
+
"grad_norm": 0.8285008668899536,
|
17764 |
+
"learning_rate": 1.4827364291000735e-06,
|
17765 |
+
"loss": 82.1914,
|
17766 |
+
"step": 2507
|
17767 |
+
},
|
17768 |
+
{
|
17769 |
+
"epoch": 1.7197703316479562,
|
17770 |
+
"grad_norm": 0.7288245558738708,
|
17771 |
+
"learning_rate": 1.4624052371118634e-06,
|
17772 |
+
"loss": 82.2586,
|
17773 |
+
"step": 2508
|
17774 |
+
},
|
17775 |
+
{
|
17776 |
+
"epoch": 1.7204559088182365,
|
17777 |
+
"grad_norm": 0.753436803817749,
|
17778 |
+
"learning_rate": 1.4422133723607233e-06,
|
17779 |
+
"loss": 82.2104,
|
17780 |
+
"step": 2509
|
17781 |
+
},
|
17782 |
+
{
|
17783 |
+
"epoch": 1.7211414859885166,
|
17784 |
+
"grad_norm": 0.7888475060462952,
|
17785 |
+
"learning_rate": 1.4221608633969263e-06,
|
17786 |
+
"loss": 82.2759,
|
17787 |
+
"step": 2510
|
17788 |
+
},
|
17789 |
+
{
|
17790 |
+
"epoch": 1.7218270631587969,
|
17791 |
+
"grad_norm": 0.8458419442176819,
|
17792 |
+
"learning_rate": 1.4022477385737365e-06,
|
17793 |
+
"loss": 82.269,
|
17794 |
+
"step": 2511
|
17795 |
+
},
|
17796 |
+
{
|
17797 |
+
"epoch": 1.722512640329077,
|
17798 |
+
"grad_norm": 0.7998586297035217,
|
17799 |
+
"learning_rate": 1.382474026047298e-06,
|
17800 |
+
"loss": 82.2513,
|
17801 |
+
"step": 2512
|
17802 |
+
},
|
17803 |
+
{
|
17804 |
+
"epoch": 1.7231982174993572,
|
17805 |
+
"grad_norm": 0.6696304678916931,
|
17806 |
+
"learning_rate": 1.3628397537766568e-06,
|
17807 |
+
"loss": 82.1494,
|
17808 |
+
"step": 2513
|
17809 |
+
},
|
17810 |
+
{
|
17811 |
+
"epoch": 1.7238837946696375,
|
17812 |
+
"grad_norm": 0.6818397045135498,
|
17813 |
+
"learning_rate": 1.343344949523706e-06,
|
17814 |
+
"loss": 82.2001,
|
17815 |
+
"step": 2514
|
17816 |
+
},
|
17817 |
+
{
|
17818 |
+
"epoch": 1.7245693718399178,
|
17819 |
+
"grad_norm": 0.6361696124076843,
|
17820 |
+
"learning_rate": 1.323989640853107e-06,
|
17821 |
+
"loss": 82.2448,
|
17822 |
+
"step": 2515
|
17823 |
+
},
|
17824 |
+
{
|
17825 |
+
"epoch": 1.725254949010198,
|
17826 |
+
"grad_norm": 0.6482613682746887,
|
17827 |
+
"learning_rate": 1.304773855132313e-06,
|
17828 |
+
"loss": 82.2519,
|
17829 |
+
"step": 2516
|
17830 |
+
},
|
17831 |
+
{
|
17832 |
+
"epoch": 1.7259405261804783,
|
17833 |
+
"grad_norm": 0.7171697616577148,
|
17834 |
+
"learning_rate": 1.2856976195314784e-06,
|
17835 |
+
"loss": 82.1872,
|
17836 |
+
"step": 2517
|
17837 |
+
},
|
17838 |
+
{
|
17839 |
+
"epoch": 1.7266261033507584,
|
17840 |
+
"grad_norm": 0.6757230758666992,
|
17841 |
+
"learning_rate": 1.26676096102345e-06,
|
17842 |
+
"loss": 82.2956,
|
17843 |
+
"step": 2518
|
17844 |
+
},
|
17845 |
+
{
|
17846 |
+
"epoch": 1.7273116805210387,
|
17847 |
+
"grad_norm": 0.6487752795219421,
|
17848 |
+
"learning_rate": 1.24796390638372e-06,
|
17849 |
+
"loss": 82.2076,
|
17850 |
+
"step": 2519
|
17851 |
+
},
|
17852 |
+
{
|
17853 |
+
"epoch": 1.7279972576913187,
|
17854 |
+
"grad_norm": 0.7632609009742737,
|
17855 |
+
"learning_rate": 1.2293064821903845e-06,
|
17856 |
+
"loss": 82.1435,
|
17857 |
+
"step": 2520
|
17858 |
+
},
|
17859 |
+
{
|
17860 |
+
"epoch": 1.728682834861599,
|
17861 |
+
"grad_norm": 0.614216148853302,
|
17862 |
+
"learning_rate": 1.210788714824096e-06,
|
17863 |
+
"loss": 82.2062,
|
17864 |
+
"step": 2521
|
17865 |
+
},
|
17866 |
+
{
|
17867 |
+
"epoch": 1.7293684120318793,
|
17868 |
+
"grad_norm": 0.837080717086792,
|
17869 |
+
"learning_rate": 1.192410630468066e-06,
|
17870 |
+
"loss": 82.2647,
|
17871 |
+
"step": 2522
|
17872 |
+
},
|
17873 |
+
{
|
17874 |
+
"epoch": 1.7300539892021596,
|
17875 |
+
"grad_norm": 0.6004711985588074,
|
17876 |
+
"learning_rate": 1.1741722551079859e-06,
|
17877 |
+
"loss": 82.2275,
|
17878 |
+
"step": 2523
|
17879 |
+
},
|
17880 |
+
{
|
17881 |
+
"epoch": 1.7307395663724399,
|
17882 |
+
"grad_norm": 0.692437469959259,
|
17883 |
+
"learning_rate": 1.1560736145320161e-06,
|
17884 |
+
"loss": 82.1699,
|
17885 |
+
"step": 2524
|
17886 |
+
},
|
17887 |
+
{
|
17888 |
+
"epoch": 1.7314251435427201,
|
17889 |
+
"grad_norm": 0.9786331057548523,
|
17890 |
+
"learning_rate": 1.1381147343307086e-06,
|
17891 |
+
"loss": 82.1797,
|
17892 |
+
"step": 2525
|
17893 |
+
},
|
17894 |
+
{
|
17895 |
+
"epoch": 1.7321107207130002,
|
17896 |
+
"grad_norm": 0.7589514851570129,
|
17897 |
+
"learning_rate": 1.1202956398970287e-06,
|
17898 |
+
"loss": 82.2553,
|
17899 |
+
"step": 2526
|
17900 |
+
},
|
17901 |
+
{
|
17902 |
+
"epoch": 1.7327962978832805,
|
17903 |
+
"grad_norm": 0.7883815765380859,
|
17904 |
+
"learning_rate": 1.1026163564263003e-06,
|
17905 |
+
"loss": 82.2521,
|
17906 |
+
"step": 2527
|
17907 |
+
},
|
17908 |
+
{
|
17909 |
+
"epoch": 1.7334818750535606,
|
17910 |
+
"grad_norm": 0.6983951330184937,
|
17911 |
+
"learning_rate": 1.0850769089161384e-06,
|
17912 |
+
"loss": 82.2064,
|
17913 |
+
"step": 2528
|
17914 |
+
},
|
17915 |
+
{
|
17916 |
+
"epoch": 1.7341674522238408,
|
17917 |
+
"grad_norm": 0.8901423215866089,
|
17918 |
+
"learning_rate": 1.0676773221664382e-06,
|
17919 |
+
"loss": 82.2177,
|
17920 |
+
"step": 2529
|
17921 |
+
},
|
17922 |
+
{
|
17923 |
+
"epoch": 1.7348530293941211,
|
17924 |
+
"grad_norm": 0.6684783697128296,
|
17925 |
+
"learning_rate": 1.0504176207793649e-06,
|
17926 |
+
"loss": 82.2156,
|
17927 |
+
"step": 2530
|
17928 |
+
},
|
17929 |
+
{
|
17930 |
+
"epoch": 1.7355386065644014,
|
17931 |
+
"grad_norm": 0.7410807013511658,
|
17932 |
+
"learning_rate": 1.0332978291592631e-06,
|
17933 |
+
"loss": 82.1716,
|
17934 |
+
"step": 2531
|
17935 |
+
},
|
17936 |
+
{
|
17937 |
+
"epoch": 1.7362241837346817,
|
17938 |
+
"grad_norm": 0.764433741569519,
|
17939 |
+
"learning_rate": 1.0163179715126593e-06,
|
17940 |
+
"loss": 82.1475,
|
17941 |
+
"step": 2532
|
17942 |
+
},
|
17943 |
+
{
|
17944 |
+
"epoch": 1.736909760904962,
|
17945 |
+
"grad_norm": 0.7061108946800232,
|
17946 |
+
"learning_rate": 9.994780718482367e-07,
|
17947 |
+
"loss": 82.2915,
|
17948 |
+
"step": 2533
|
17949 |
+
},
|
17950 |
+
{
|
17951 |
+
"epoch": 1.7375953380752422,
|
17952 |
+
"grad_norm": 0.8760568499565125,
|
17953 |
+
"learning_rate": 9.827781539767488e-07,
|
17954 |
+
"loss": 82.2156,
|
17955 |
+
"step": 2534
|
17956 |
+
},
|
17957 |
+
{
|
17958 |
+
"epoch": 1.7382809152455223,
|
17959 |
+
"grad_norm": 0.6002421975135803,
|
17960 |
+
"learning_rate": 9.662182415110632e-07,
|
17961 |
+
"loss": 82.1607,
|
17962 |
+
"step": 2535
|
17963 |
+
},
|
17964 |
+
{
|
17965 |
+
"epoch": 1.7389664924158026,
|
17966 |
+
"grad_norm": 0.6147943735122681,
|
17967 |
+
"learning_rate": 9.49798357866083e-07,
|
17968 |
+
"loss": 82.2697,
|
17969 |
+
"step": 2536
|
17970 |
+
},
|
17971 |
+
{
|
17972 |
+
"epoch": 1.7396520695860826,
|
17973 |
+
"grad_norm": 0.7360944151878357,
|
17974 |
+
"learning_rate": 9.335185262586699e-07,
|
17975 |
+
"loss": 82.2135,
|
17976 |
+
"step": 2537
|
17977 |
+
},
|
17978 |
+
{
|
17979 |
+
"epoch": 1.740337646756363,
|
17980 |
+
"grad_norm": 0.7279215455055237,
|
17981 |
+
"learning_rate": 9.173787697077107e-07,
|
17982 |
+
"loss": 82.199,
|
17983 |
+
"step": 2538
|
17984 |
+
},
|
17985 |
+
{
|
17986 |
+
"epoch": 1.7410232239266432,
|
17987 |
+
"grad_norm": 0.6556176543235779,
|
17988 |
+
"learning_rate": 9.013791110340175e-07,
|
17989 |
+
"loss": 82.2059,
|
17990 |
+
"step": 2539
|
17991 |
+
},
|
17992 |
+
{
|
17993 |
+
"epoch": 1.7417088010969235,
|
17994 |
+
"grad_norm": 0.6386843919754028,
|
17995 |
+
"learning_rate": 8.855195728602939e-07,
|
17996 |
+
"loss": 82.1856,
|
17997 |
+
"step": 2540
|
17998 |
+
},
|
17999 |
+
{
|
18000 |
+
"epoch": 1.7423943782672038,
|
18001 |
+
"grad_norm": 0.6329452991485596,
|
18002 |
+
"learning_rate": 8.698001776111575e-07,
|
18003 |
+
"loss": 82.2459,
|
18004 |
+
"step": 2541
|
18005 |
+
},
|
18006 |
+
{
|
18007 |
+
"epoch": 1.743079955437484,
|
18008 |
+
"grad_norm": 0.6525189876556396,
|
18009 |
+
"learning_rate": 8.542209475130292e-07,
|
18010 |
+
"loss": 82.1942,
|
18011 |
+
"step": 2542
|
18012 |
+
},
|
18013 |
+
{
|
18014 |
+
"epoch": 1.7437655326077641,
|
18015 |
+
"grad_norm": 0.5788668990135193,
|
18016 |
+
"learning_rate": 8.387819045941769e-07,
|
18017 |
+
"loss": 82.183,
|
18018 |
+
"step": 2543
|
18019 |
+
},
|
18020 |
+
{
|
18021 |
+
"epoch": 1.7444511097780444,
|
18022 |
+
"grad_norm": 0.7355834245681763,
|
18023 |
+
"learning_rate": 8.234830706846164e-07,
|
18024 |
+
"loss": 82.2277,
|
18025 |
+
"step": 2544
|
18026 |
+
},
|
18027 |
+
{
|
18028 |
+
"epoch": 1.7451366869483245,
|
18029 |
+
"grad_norm": 0.6216195225715637,
|
18030 |
+
"learning_rate": 8.08324467416155e-07,
|
18031 |
+
"loss": 82.2398,
|
18032 |
+
"step": 2545
|
18033 |
+
},
|
18034 |
+
{
|
18035 |
+
"epoch": 1.7458222641186047,
|
18036 |
+
"grad_norm": 0.6454034447669983,
|
18037 |
+
"learning_rate": 7.933061162222921e-07,
|
18038 |
+
"loss": 82.2615,
|
18039 |
+
"step": 2546
|
18040 |
+
},
|
18041 |
+
{
|
18042 |
+
"epoch": 1.746507841288885,
|
18043 |
+
"grad_norm": 0.533258855342865,
|
18044 |
+
"learning_rate": 7.784280383382192e-07,
|
18045 |
+
"loss": 82.2377,
|
18046 |
+
"step": 2547
|
18047 |
+
},
|
18048 |
+
{
|
18049 |
+
"epoch": 1.7471934184591653,
|
18050 |
+
"grad_norm": 0.644951343536377,
|
18051 |
+
"learning_rate": 7.636902548008085e-07,
|
18052 |
+
"loss": 82.174,
|
18053 |
+
"step": 2548
|
18054 |
+
},
|
18055 |
+
{
|
18056 |
+
"epoch": 1.7478789956294456,
|
18057 |
+
"grad_norm": 0.7034481763839722,
|
18058 |
+
"learning_rate": 7.490927864485464e-07,
|
18059 |
+
"loss": 82.1811,
|
18060 |
+
"step": 2549
|
18061 |
+
},
|
18062 |
+
{
|
18063 |
+
"epoch": 1.7485645727997259,
|
18064 |
+
"grad_norm": 0.7296063899993896,
|
18065 |
+
"learning_rate": 7.346356539215116e-07,
|
18066 |
+
"loss": 82.2469,
|
18067 |
+
"step": 2550
|
18068 |
+
},
|
18069 |
+
{
|
18070 |
+
"epoch": 1.7492501499700062,
|
18071 |
+
"grad_norm": 0.701720118522644,
|
18072 |
+
"learning_rate": 7.203188776613745e-07,
|
18073 |
+
"loss": 82.1859,
|
18074 |
+
"step": 2551
|
18075 |
+
},
|
18076 |
+
{
|
18077 |
+
"epoch": 1.7499357271402862,
|
18078 |
+
"grad_norm": 0.6307011842727661,
|
18079 |
+
"learning_rate": 7.061424779113424e-07,
|
18080 |
+
"loss": 82.2474,
|
18081 |
+
"step": 2552
|
18082 |
+
},
|
18083 |
+
{
|
18084 |
+
"epoch": 1.7506213043105665,
|
18085 |
+
"grad_norm": 0.6798613667488098,
|
18086 |
+
"learning_rate": 6.921064747161476e-07,
|
18087 |
+
"loss": 82.1944,
|
18088 |
+
"step": 2553
|
18089 |
+
},
|
18090 |
+
{
|
18091 |
+
"epoch": 1.7513068814808466,
|
18092 |
+
"grad_norm": 0.6421661972999573,
|
18093 |
+
"learning_rate": 6.782108879219817e-07,
|
18094 |
+
"loss": 82.2346,
|
18095 |
+
"step": 2554
|
18096 |
+
},
|
18097 |
+
{
|
18098 |
+
"epoch": 1.7519924586511268,
|
18099 |
+
"grad_norm": 0.7070729732513428,
|
18100 |
+
"learning_rate": 6.64455737176517e-07,
|
18101 |
+
"loss": 82.2537,
|
18102 |
+
"step": 2555
|
18103 |
+
},
|
18104 |
+
{
|
18105 |
+
"epoch": 1.7526780358214071,
|
18106 |
+
"grad_norm": 0.60176682472229,
|
18107 |
+
"learning_rate": 6.508410419288513e-07,
|
18108 |
+
"loss": 82.1744,
|
18109 |
+
"step": 2556
|
18110 |
+
},
|
18111 |
+
{
|
18112 |
+
"epoch": 1.7533636129916874,
|
18113 |
+
"grad_norm": 0.7718641757965088,
|
18114 |
+
"learning_rate": 6.373668214294859e-07,
|
18115 |
+
"loss": 82.2105,
|
18116 |
+
"step": 2557
|
18117 |
+
},
|
18118 |
+
{
|
18119 |
+
"epoch": 1.7540491901619677,
|
18120 |
+
"grad_norm": 0.8797594308853149,
|
18121 |
+
"learning_rate": 6.240330947302808e-07,
|
18122 |
+
"loss": 82.2068,
|
18123 |
+
"step": 2558
|
18124 |
+
},
|
18125 |
+
{
|
18126 |
+
"epoch": 1.754734767332248,
|
18127 |
+
"grad_norm": 0.7588513493537903,
|
18128 |
+
"learning_rate": 6.108398806844662e-07,
|
18129 |
+
"loss": 82.2615,
|
18130 |
+
"step": 2559
|
18131 |
+
},
|
18132 |
+
{
|
18133 |
+
"epoch": 1.755420344502528,
|
18134 |
+
"grad_norm": 0.6733604073524475,
|
18135 |
+
"learning_rate": 5.977871979465977e-07,
|
18136 |
+
"loss": 82.2358,
|
18137 |
+
"step": 2560
|
18138 |
+
},
|
18139 |
+
{
|
18140 |
+
"epoch": 1.7561059216728083,
|
18141 |
+
"grad_norm": 0.5898498892784119,
|
18142 |
+
"learning_rate": 5.848750649725121e-07,
|
18143 |
+
"loss": 82.1374,
|
18144 |
+
"step": 2561
|
18145 |
+
},
|
18146 |
+
{
|
18147 |
+
"epoch": 1.7567914988430884,
|
18148 |
+
"grad_norm": 0.7103486657142639,
|
18149 |
+
"learning_rate": 5.721035000193165e-07,
|
18150 |
+
"loss": 82.2805,
|
18151 |
+
"step": 2562
|
18152 |
+
},
|
18153 |
+
{
|
18154 |
+
"epoch": 1.7574770760133687,
|
18155 |
+
"grad_norm": 0.8472493886947632,
|
18156 |
+
"learning_rate": 5.594725211453655e-07,
|
18157 |
+
"loss": 82.1691,
|
18158 |
+
"step": 2563
|
18159 |
+
},
|
18160 |
+
{
|
18161 |
+
"epoch": 1.758162653183649,
|
18162 |
+
"grad_norm": 0.6238364577293396,
|
18163 |
+
"learning_rate": 5.469821462102398e-07,
|
18164 |
+
"loss": 82.1667,
|
18165 |
+
"step": 2564
|
18166 |
+
},
|
18167 |
+
{
|
18168 |
+
"epoch": 1.7588482303539292,
|
18169 |
+
"grad_norm": 0.6626426577568054,
|
18170 |
+
"learning_rate": 5.34632392874701e-07,
|
18171 |
+
"loss": 82.2053,
|
18172 |
+
"step": 2565
|
18173 |
+
},
|
18174 |
+
{
|
18175 |
+
"epoch": 1.7595338075242095,
|
18176 |
+
"grad_norm": 0.76328045129776,
|
18177 |
+
"learning_rate": 5.224232786006811e-07,
|
18178 |
+
"loss": 82.1946,
|
18179 |
+
"step": 2566
|
18180 |
+
},
|
18181 |
+
{
|
18182 |
+
"epoch": 1.7602193846944898,
|
18183 |
+
"grad_norm": 0.695488691329956,
|
18184 |
+
"learning_rate": 5.103548206512487e-07,
|
18185 |
+
"loss": 82.1913,
|
18186 |
+
"step": 2567
|
18187 |
+
},
|
18188 |
+
{
|
18189 |
+
"epoch": 1.7609049618647699,
|
18190 |
+
"grad_norm": 0.6686795353889465,
|
18191 |
+
"learning_rate": 4.984270360906207e-07,
|
18192 |
+
"loss": 82.2561,
|
18193 |
+
"step": 2568
|
18194 |
+
},
|
18195 |
+
{
|
18196 |
+
"epoch": 1.7615905390350501,
|
18197 |
+
"grad_norm": 0.6828111410140991,
|
18198 |
+
"learning_rate": 4.866399417840839e-07,
|
18199 |
+
"loss": 82.2495,
|
18200 |
+
"step": 2569
|
18201 |
+
},
|
18202 |
+
{
|
18203 |
+
"epoch": 1.7622761162053302,
|
18204 |
+
"grad_norm": 0.6867351531982422,
|
18205 |
+
"learning_rate": 4.7499355439798443e-07,
|
18206 |
+
"loss": 82.2579,
|
18207 |
+
"step": 2570
|
18208 |
+
},
|
18209 |
+
{
|
18210 |
+
"epoch": 1.7629616933756105,
|
18211 |
+
"grad_norm": 0.8694186806678772,
|
18212 |
+
"learning_rate": 4.6348789039974973e-07,
|
18213 |
+
"loss": 82.2177,
|
18214 |
+
"step": 2571
|
18215 |
+
},
|
18216 |
+
{
|
18217 |
+
"epoch": 1.7636472705458908,
|
18218 |
+
"grad_norm": 0.823703408241272,
|
18219 |
+
"learning_rate": 4.521229660578108e-07,
|
18220 |
+
"loss": 82.1858,
|
18221 |
+
"step": 2572
|
18222 |
+
},
|
18223 |
+
{
|
18224 |
+
"epoch": 1.764332847716171,
|
18225 |
+
"grad_norm": 0.6795697212219238,
|
18226 |
+
"learning_rate": 4.4089879744160234e-07,
|
18227 |
+
"loss": 82.2448,
|
18228 |
+
"step": 2573
|
18229 |
+
},
|
18230 |
+
{
|
18231 |
+
"epoch": 1.7650184248864513,
|
18232 |
+
"grad_norm": 0.6945237517356873,
|
18233 |
+
"learning_rate": 4.2981540042152934e-07,
|
18234 |
+
"loss": 82.2162,
|
18235 |
+
"step": 2574
|
18236 |
+
},
|
18237 |
+
{
|
18238 |
+
"epoch": 1.7657040020567316,
|
18239 |
+
"grad_norm": 0.7060695290565491,
|
18240 |
+
"learning_rate": 4.188727906689782e-07,
|
18241 |
+
"loss": 82.2068,
|
18242 |
+
"step": 2575
|
18243 |
+
},
|
18244 |
+
{
|
18245 |
+
"epoch": 1.766389579227012,
|
18246 |
+
"grad_norm": 0.7274417877197266,
|
18247 |
+
"learning_rate": 4.080709836562391e-07,
|
18248 |
+
"loss": 82.2307,
|
18249 |
+
"step": 2576
|
18250 |
+
},
|
18251 |
+
{
|
18252 |
+
"epoch": 1.767075156397292,
|
18253 |
+
"grad_norm": 0.6976255774497986,
|
18254 |
+
"learning_rate": 3.9740999465653904e-07,
|
18255 |
+
"loss": 82.1747,
|
18256 |
+
"step": 2577
|
18257 |
+
},
|
18258 |
+
{
|
18259 |
+
"epoch": 1.7677607335675722,
|
18260 |
+
"grad_norm": 0.8024837374687195,
|
18261 |
+
"learning_rate": 3.8688983874396454e-07,
|
18262 |
+
"loss": 82.2583,
|
18263 |
+
"step": 2578
|
18264 |
+
},
|
18265 |
+
{
|
18266 |
+
"epoch": 1.7684463107378523,
|
18267 |
+
"grad_norm": 0.825741171836853,
|
18268 |
+
"learning_rate": 3.7651053079350575e-07,
|
18269 |
+
"loss": 82.2569,
|
18270 |
+
"step": 2579
|
18271 |
+
},
|
18272 |
+
{
|
18273 |
+
"epoch": 1.7691318879081326,
|
18274 |
+
"grad_norm": 0.8342203497886658,
|
18275 |
+
"learning_rate": 3.6627208548097866e-07,
|
18276 |
+
"loss": 82.1863,
|
18277 |
+
"step": 2580
|
18278 |
+
},
|
18279 |
+
{
|
18280 |
+
"epoch": 1.7698174650784129,
|
18281 |
+
"grad_norm": 0.7146753668785095,
|
18282 |
+
"learning_rate": 3.561745172830477e-07,
|
18283 |
+
"loss": 82.2755,
|
18284 |
+
"step": 2581
|
18285 |
+
},
|
18286 |
+
{
|
18287 |
+
"epoch": 1.7705030422486931,
|
18288 |
+
"grad_norm": 0.6945064067840576,
|
18289 |
+
"learning_rate": 3.462178404771477e-07,
|
18290 |
+
"loss": 82.2087,
|
18291 |
+
"step": 2582
|
18292 |
+
},
|
18293 |
+
{
|
18294 |
+
"epoch": 1.7711886194189734,
|
18295 |
+
"grad_norm": 0.7338677048683167,
|
18296 |
+
"learning_rate": 3.3640206914153927e-07,
|
18297 |
+
"loss": 82.2696,
|
18298 |
+
"step": 2583
|
18299 |
+
},
|
18300 |
+
{
|
18301 |
+
"epoch": 1.7718741965892537,
|
18302 |
+
"grad_norm": 0.6690239906311035,
|
18303 |
+
"learning_rate": 3.267272171552316e-07,
|
18304 |
+
"loss": 82.2093,
|
18305 |
+
"step": 2584
|
18306 |
+
},
|
18307 |
+
{
|
18308 |
+
"epoch": 1.7725597737595338,
|
18309 |
+
"grad_norm": 0.9962079524993896,
|
18310 |
+
"learning_rate": 3.171932981979708e-07,
|
18311 |
+
"loss": 82.2489,
|
18312 |
+
"step": 2585
|
18313 |
+
},
|
18314 |
+
{
|
18315 |
+
"epoch": 1.773245350929814,
|
18316 |
+
"grad_norm": 0.7202107310295105,
|
18317 |
+
"learning_rate": 3.0780032575025155e-07,
|
18318 |
+
"loss": 82.1385,
|
18319 |
+
"step": 2586
|
18320 |
+
},
|
18321 |
+
{
|
18322 |
+
"epoch": 1.7739309281000941,
|
18323 |
+
"grad_norm": 0.7426759600639343,
|
18324 |
+
"learning_rate": 2.9854831309327204e-07,
|
18325 |
+
"loss": 82.2194,
|
18326 |
+
"step": 2587
|
18327 |
+
},
|
18328 |
+
{
|
18329 |
+
"epoch": 1.7746165052703744,
|
18330 |
+
"grad_norm": 0.5640959143638611,
|
18331 |
+
"learning_rate": 2.8943727330890125e-07,
|
18332 |
+
"loss": 82.2244,
|
18333 |
+
"step": 2588
|
18334 |
+
},
|
18335 |
+
{
|
18336 |
+
"epoch": 1.7753020824406547,
|
18337 |
+
"grad_norm": 0.7230386734008789,
|
18338 |
+
"learning_rate": 2.80467219279712e-07,
|
18339 |
+
"loss": 82.1554,
|
18340 |
+
"step": 2589
|
18341 |
+
},
|
18342 |
+
{
|
18343 |
+
"epoch": 1.775987659610935,
|
18344 |
+
"grad_norm": 0.6880918145179749,
|
18345 |
+
"learning_rate": 2.7163816368890314e-07,
|
18346 |
+
"loss": 82.2226,
|
18347 |
+
"step": 2590
|
18348 |
+
},
|
18349 |
+
{
|
18350 |
+
"epoch": 1.7766732367812152,
|
18351 |
+
"grad_norm": 0.8692451119422913,
|
18352 |
+
"learning_rate": 2.6295011902031097e-07,
|
18353 |
+
"loss": 82.2665,
|
18354 |
+
"step": 2591
|
18355 |
+
},
|
18356 |
+
{
|
18357 |
+
"epoch": 1.7773588139514955,
|
18358 |
+
"grad_norm": 0.7101627588272095,
|
18359 |
+
"learning_rate": 2.5440309755839775e-07,
|
18360 |
+
"loss": 82.2309,
|
18361 |
+
"step": 2592
|
18362 |
+
},
|
18363 |
+
{
|
18364 |
+
"epoch": 1.7780443911217756,
|
18365 |
+
"grad_norm": 0.7459553480148315,
|
18366 |
+
"learning_rate": 2.459971113882409e-07,
|
18367 |
+
"loss": 82.2684,
|
18368 |
+
"step": 2593
|
18369 |
+
},
|
18370 |
+
{
|
18371 |
+
"epoch": 1.7787299682920559,
|
18372 |
+
"grad_norm": 0.6015447974205017,
|
18373 |
+
"learning_rate": 2.377321723954773e-07,
|
18374 |
+
"loss": 82.2359,
|
18375 |
+
"step": 2594
|
18376 |
+
},
|
18377 |
+
{
|
18378 |
+
"epoch": 1.7794155454623362,
|
18379 |
+
"grad_norm": 0.825840175151825,
|
18380 |
+
"learning_rate": 2.2960829226631452e-07,
|
18381 |
+
"loss": 82.2056,
|
18382 |
+
"step": 2595
|
18383 |
+
},
|
18384 |
+
{
|
18385 |
+
"epoch": 1.7801011226326162,
|
18386 |
+
"grad_norm": 0.7459067106246948,
|
18387 |
+
"learning_rate": 2.216254824875197e-07,
|
18388 |
+
"loss": 82.218,
|
18389 |
+
"step": 2596
|
18390 |
+
},
|
18391 |
+
{
|
18392 |
+
"epoch": 1.7807866998028965,
|
18393 |
+
"grad_norm": 0.6501146554946899,
|
18394 |
+
"learning_rate": 2.13783754346375e-07,
|
18395 |
+
"loss": 82.1983,
|
18396 |
+
"step": 2597
|
18397 |
+
},
|
18398 |
+
{
|
18399 |
+
"epoch": 1.7814722769731768,
|
18400 |
+
"grad_norm": 0.6280929446220398,
|
18401 |
+
"learning_rate": 2.060831189307e-07,
|
18402 |
+
"loss": 82.2458,
|
18403 |
+
"step": 2598
|
18404 |
+
},
|
18405 |
+
{
|
18406 |
+
"epoch": 1.782157854143457,
|
18407 |
+
"grad_norm": 0.7218676209449768,
|
18408 |
+
"learning_rate": 1.9852358712880713e-07,
|
18409 |
+
"loss": 82.2687,
|
18410 |
+
"step": 2599
|
18411 |
+
},
|
18412 |
+
{
|
18413 |
+
"epoch": 1.7828434313137373,
|
18414 |
+
"grad_norm": 0.6138923168182373,
|
18415 |
+
"learning_rate": 1.9110516962950186e-07,
|
18416 |
+
"loss": 82.2733,
|
18417 |
+
"step": 2600
|
18418 |
+
},
|
18419 |
+
{
|
18420 |
+
"epoch": 1.7828434313137373,
|
18421 |
+
"eval_loss": 10.27136516571045,
|
18422 |
+
"eval_runtime": 5.7783,
|
18423 |
+
"eval_samples_per_second": 336.607,
|
18424 |
+
"eval_steps_per_second": 84.282,
|
18425 |
+
"step": 2600
|
18426 |
}
|
18427 |
],
|
18428 |
"logging_steps": 1,
|
|
|
18451 |
"attributes": {}
|
18452 |
}
|
18453 |
},
|
18454 |
+
"total_flos": 1074706513920000.0,
|
18455 |
"train_batch_size": 4,
|
18456 |
"trial_name": null,
|
18457 |
"trial_params": null
|