Training in progress, step 840, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 289512208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27db5c66916d7daad771886e225cd7152669ee12e611a380fbf0009c9af37adc
|
3 |
size 289512208
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 147781972
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3577d633018a2488773a311af50b3f09dfc5134434176462a126cedd7dcc57c2
|
3 |
size 147781972
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30a9264446d9bfcf977beea433026295798ed92bc03fae79d89f70494644af49
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:699b3777e1ea7a60123ef22ecc366f524146f7231f57273c73780dc41dc98d5c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.203278660774231,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-800",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5679,6 +5679,286 @@
|
|
5679 |
"eval_samples_per_second": 4.035,
|
5680 |
"eval_steps_per_second": 1.009,
|
5681 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5682 |
}
|
5683 |
],
|
5684 |
"logging_steps": 1,
|
@@ -5702,12 +5982,12 @@
|
|
5702 |
"should_evaluate": false,
|
5703 |
"should_log": false,
|
5704 |
"should_save": true,
|
5705 |
-
"should_training_stop":
|
5706 |
},
|
5707 |
"attributes": {}
|
5708 |
}
|
5709 |
},
|
5710 |
-
"total_flos": 4.
|
5711 |
"train_batch_size": 4,
|
5712 |
"trial_name": null,
|
5713 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.203278660774231,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-800",
|
4 |
+
"epoch": 0.05339477970680545,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 840,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5679 |
"eval_samples_per_second": 4.035,
|
5680 |
"eval_steps_per_second": 1.009,
|
5681 |
"step": 800
|
5682 |
+
},
|
5683 |
+
{
|
5684 |
+
"epoch": 0.0509157363632752,
|
5685 |
+
"grad_norm": 0.2525332570075989,
|
5686 |
+
"learning_rate": 1.0875619410158466e-06,
|
5687 |
+
"loss": 1.1738,
|
5688 |
+
"step": 801
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 0.05097930157721187,
|
5692 |
+
"grad_norm": 0.24210986495018005,
|
5693 |
+
"learning_rate": 1.0325995198509409e-06,
|
5694 |
+
"loss": 1.1435,
|
5695 |
+
"step": 802
|
5696 |
+
},
|
5697 |
+
{
|
5698 |
+
"epoch": 0.05104286679114854,
|
5699 |
+
"grad_norm": 0.25146523118019104,
|
5700 |
+
"learning_rate": 9.79054964740911e-07,
|
5701 |
+
"loss": 1.2707,
|
5702 |
+
"step": 803
|
5703 |
+
},
|
5704 |
+
{
|
5705 |
+
"epoch": 0.05110643200508522,
|
5706 |
+
"grad_norm": 0.2529788315296173,
|
5707 |
+
"learning_rate": 9.269290427969868e-07,
|
5708 |
+
"loss": 1.1679,
|
5709 |
+
"step": 804
|
5710 |
+
},
|
5711 |
+
{
|
5712 |
+
"epoch": 0.05116999721902189,
|
5713 |
+
"grad_norm": 0.25673815608024597,
|
5714 |
+
"learning_rate": 8.762225008062674e-07,
|
5715 |
+
"loss": 1.2141,
|
5716 |
+
"step": 805
|
5717 |
+
},
|
5718 |
+
{
|
5719 |
+
"epoch": 0.05123356243295856,
|
5720 |
+
"grad_norm": 0.25803902745246887,
|
5721 |
+
"learning_rate": 8.26936065220929e-07,
|
5722 |
+
"loss": 1.2018,
|
5723 |
+
"step": 806
|
5724 |
+
},
|
5725 |
+
{
|
5726 |
+
"epoch": 0.05129712764689524,
|
5727 |
+
"grad_norm": 0.2565945088863373,
|
5728 |
+
"learning_rate": 7.790704421478557e-07,
|
5729 |
+
"loss": 1.2261,
|
5730 |
+
"step": 807
|
5731 |
+
},
|
5732 |
+
{
|
5733 |
+
"epoch": 0.05136069286083191,
|
5734 |
+
"grad_norm": 0.2625206410884857,
|
5735 |
+
"learning_rate": 7.326263173385584e-07,
|
5736 |
+
"loss": 1.1934,
|
5737 |
+
"step": 808
|
5738 |
+
},
|
5739 |
+
{
|
5740 |
+
"epoch": 0.05142425807476858,
|
5741 |
+
"grad_norm": 0.25721174478530884,
|
5742 |
+
"learning_rate": 6.876043561792833e-07,
|
5743 |
+
"loss": 1.2349,
|
5744 |
+
"step": 809
|
5745 |
+
},
|
5746 |
+
{
|
5747 |
+
"epoch": 0.051487823288705256,
|
5748 |
+
"grad_norm": 0.25995710492134094,
|
5749 |
+
"learning_rate": 6.440052036815081e-07,
|
5750 |
+
"loss": 1.3027,
|
5751 |
+
"step": 810
|
5752 |
+
},
|
5753 |
+
{
|
5754 |
+
"epoch": 0.05155138850264193,
|
5755 |
+
"grad_norm": 0.25100308656692505,
|
5756 |
+
"learning_rate": 6.018294844727379e-07,
|
5757 |
+
"loss": 1.1802,
|
5758 |
+
"step": 811
|
5759 |
+
},
|
5760 |
+
{
|
5761 |
+
"epoch": 0.0516149537165786,
|
5762 |
+
"grad_norm": 0.2459433674812317,
|
5763 |
+
"learning_rate": 5.610778027874908e-07,
|
5764 |
+
"loss": 1.1474,
|
5765 |
+
"step": 812
|
5766 |
+
},
|
5767 |
+
{
|
5768 |
+
"epoch": 0.051678518930515276,
|
5769 |
+
"grad_norm": 0.2367779165506363,
|
5770 |
+
"learning_rate": 5.217507424586821e-07,
|
5771 |
+
"loss": 1.168,
|
5772 |
+
"step": 813
|
5773 |
+
},
|
5774 |
+
{
|
5775 |
+
"epoch": 0.05174208414445195,
|
5776 |
+
"grad_norm": 0.2512117922306061,
|
5777 |
+
"learning_rate": 4.838488669092534e-07,
|
5778 |
+
"loss": 1.091,
|
5779 |
+
"step": 814
|
5780 |
+
},
|
5781 |
+
{
|
5782 |
+
"epoch": 0.05180564935838862,
|
5783 |
+
"grad_norm": 0.2595987319946289,
|
5784 |
+
"learning_rate": 4.4737271914411236e-07,
|
5785 |
+
"loss": 1.1756,
|
5786 |
+
"step": 815
|
5787 |
+
},
|
5788 |
+
{
|
5789 |
+
"epoch": 0.051869214572325295,
|
5790 |
+
"grad_norm": 0.26023730635643005,
|
5791 |
+
"learning_rate": 4.123228217422948e-07,
|
5792 |
+
"loss": 1.068,
|
5793 |
+
"step": 816
|
5794 |
+
},
|
5795 |
+
{
|
5796 |
+
"epoch": 0.05193277978626197,
|
5797 |
+
"grad_norm": 0.26552048325538635,
|
5798 |
+
"learning_rate": 3.7869967684958094e-07,
|
5799 |
+
"loss": 1.1605,
|
5800 |
+
"step": 817
|
5801 |
+
},
|
5802 |
+
{
|
5803 |
+
"epoch": 0.05199634500019864,
|
5804 |
+
"grad_norm": 0.24736690521240234,
|
5805 |
+
"learning_rate": 3.465037661712134e-07,
|
5806 |
+
"loss": 1.2006,
|
5807 |
+
"step": 818
|
5808 |
+
},
|
5809 |
+
{
|
5810 |
+
"epoch": 0.052059910214135315,
|
5811 |
+
"grad_norm": 0.26172155141830444,
|
5812 |
+
"learning_rate": 3.1573555096501283e-07,
|
5813 |
+
"loss": 1.2359,
|
5814 |
+
"step": 819
|
5815 |
+
},
|
5816 |
+
{
|
5817 |
+
"epoch": 0.05212347542807199,
|
5818 |
+
"grad_norm": 0.25399184226989746,
|
5819 |
+
"learning_rate": 2.86395472034795e-07,
|
5820 |
+
"loss": 1.2153,
|
5821 |
+
"step": 820
|
5822 |
+
},
|
5823 |
+
{
|
5824 |
+
"epoch": 0.05218704064200866,
|
5825 |
+
"grad_norm": 0.25162798166275024,
|
5826 |
+
"learning_rate": 2.584839497240643e-07,
|
5827 |
+
"loss": 1.2581,
|
5828 |
+
"step": 821
|
5829 |
+
},
|
5830 |
+
{
|
5831 |
+
"epoch": 0.052250605855945334,
|
5832 |
+
"grad_norm": 0.2551822066307068,
|
5833 |
+
"learning_rate": 2.3200138390993e-07,
|
5834 |
+
"loss": 1.1388,
|
5835 |
+
"step": 822
|
5836 |
+
},
|
5837 |
+
{
|
5838 |
+
"epoch": 0.05231417106988201,
|
5839 |
+
"grad_norm": 0.24114681780338287,
|
5840 |
+
"learning_rate": 2.0694815399744382e-07,
|
5841 |
+
"loss": 1.2377,
|
5842 |
+
"step": 823
|
5843 |
+
},
|
5844 |
+
{
|
5845 |
+
"epoch": 0.05237773628381868,
|
5846 |
+
"grad_norm": 0.26416000723838806,
|
5847 |
+
"learning_rate": 1.83324618914138e-07,
|
5848 |
+
"loss": 1.2193,
|
5849 |
+
"step": 824
|
5850 |
+
},
|
5851 |
+
{
|
5852 |
+
"epoch": 0.052441301497755353,
|
5853 |
+
"grad_norm": 0.25959083437919617,
|
5854 |
+
"learning_rate": 1.611311171048735e-07,
|
5855 |
+
"loss": 1.1987,
|
5856 |
+
"step": 825
|
5857 |
+
},
|
5858 |
+
{
|
5859 |
+
"epoch": 0.05250486671169203,
|
5860 |
+
"grad_norm": 0.24999088048934937,
|
5861 |
+
"learning_rate": 1.4036796652701078e-07,
|
5862 |
+
"loss": 1.1644,
|
5863 |
+
"step": 826
|
5864 |
+
},
|
5865 |
+
{
|
5866 |
+
"epoch": 0.0525684319256287,
|
5867 |
+
"grad_norm": 0.25357383489608765,
|
5868 |
+
"learning_rate": 1.210354646458245e-07,
|
5869 |
+
"loss": 1.2345,
|
5870 |
+
"step": 827
|
5871 |
+
},
|
5872 |
+
{
|
5873 |
+
"epoch": 0.05263199713956537,
|
5874 |
+
"grad_norm": 0.25583428144454956,
|
5875 |
+
"learning_rate": 1.031338884302846e-07,
|
5876 |
+
"loss": 1.2685,
|
5877 |
+
"step": 828
|
5878 |
+
},
|
5879 |
+
{
|
5880 |
+
"epoch": 0.05269556235350205,
|
5881 |
+
"grad_norm": 0.25566795468330383,
|
5882 |
+
"learning_rate": 8.666349434907073e-08,
|
5883 |
+
"loss": 1.2141,
|
5884 |
+
"step": 829
|
5885 |
+
},
|
5886 |
+
{
|
5887 |
+
"epoch": 0.05275912756743872,
|
5888 |
+
"grad_norm": 0.2603313624858856,
|
5889 |
+
"learning_rate": 7.162451836685291e-08,
|
5890 |
+
"loss": 1.2535,
|
5891 |
+
"step": 830
|
5892 |
+
},
|
5893 |
+
{
|
5894 |
+
"epoch": 0.05282269278137539,
|
5895 |
+
"grad_norm": 0.24881498515605927,
|
5896 |
+
"learning_rate": 5.8017175941005306e-08,
|
5897 |
+
"loss": 1.1596,
|
5898 |
+
"step": 831
|
5899 |
+
},
|
5900 |
+
{
|
5901 |
+
"epoch": 0.05288625799531207,
|
5902 |
+
"grad_norm": 0.2581416070461273,
|
5903 |
+
"learning_rate": 4.584166201841988e-08,
|
5904 |
+
"loss": 1.2291,
|
5905 |
+
"step": 832
|
5906 |
+
},
|
5907 |
+
{
|
5908 |
+
"epoch": 0.052949823209248736,
|
5909 |
+
"grad_norm": 0.2521674335002899,
|
5910 |
+
"learning_rate": 3.5098151032786355e-08,
|
5911 |
+
"loss": 1.2752,
|
5912 |
+
"step": 833
|
5913 |
+
},
|
5914 |
+
{
|
5915 |
+
"epoch": 0.05301338842318541,
|
5916 |
+
"grad_norm": 0.2460847645998001,
|
5917 |
+
"learning_rate": 2.578679690204977e-08,
|
5918 |
+
"loss": 1.1633,
|
5919 |
+
"step": 834
|
5920 |
+
},
|
5921 |
+
{
|
5922 |
+
"epoch": 0.05307695363712209,
|
5923 |
+
"grad_norm": 0.2515714764595032,
|
5924 |
+
"learning_rate": 1.7907733026223394e-08,
|
5925 |
+
"loss": 1.1517,
|
5926 |
+
"step": 835
|
5927 |
+
},
|
5928 |
+
{
|
5929 |
+
"epoch": 0.053140518851058756,
|
5930 |
+
"grad_norm": 0.2554892301559448,
|
5931 |
+
"learning_rate": 1.1461072285490204e-08,
|
5932 |
+
"loss": 1.1205,
|
5933 |
+
"step": 836
|
5934 |
+
},
|
5935 |
+
{
|
5936 |
+
"epoch": 0.05320408406499543,
|
5937 |
+
"grad_norm": 0.2557508945465088,
|
5938 |
+
"learning_rate": 6.446907038559769e-09,
|
5939 |
+
"loss": 1.1845,
|
5940 |
+
"step": 837
|
5941 |
+
},
|
5942 |
+
{
|
5943 |
+
"epoch": 0.05326764927893211,
|
5944 |
+
"grad_norm": 0.25483280420303345,
|
5945 |
+
"learning_rate": 2.865309121358184e-09,
|
5946 |
+
"loss": 1.1348,
|
5947 |
+
"step": 838
|
5948 |
+
},
|
5949 |
+
{
|
5950 |
+
"epoch": 0.053331214492868775,
|
5951 |
+
"grad_norm": 0.2680445909500122,
|
5952 |
+
"learning_rate": 7.163298459844647e-10,
|
5953 |
+
"loss": 1.1985,
|
5954 |
+
"step": 839
|
5955 |
+
},
|
5956 |
+
{
|
5957 |
+
"epoch": 0.05339477970680545,
|
5958 |
+
"grad_norm": 0.26120489835739136,
|
5959 |
+
"learning_rate": 0.0,
|
5960 |
+
"loss": 1.264,
|
5961 |
+
"step": 840
|
5962 |
}
|
5963 |
],
|
5964 |
"logging_steps": 1,
|
|
|
5982 |
"should_evaluate": false,
|
5983 |
"should_log": false,
|
5984 |
"should_save": true,
|
5985 |
+
"should_training_stop": true
|
5986 |
},
|
5987 |
"attributes": {}
|
5988 |
}
|
5989 |
},
|
5990 |
+
"total_flos": 4.3648292071931904e+18,
|
5991 |
"train_batch_size": 4,
|
5992 |
"trial_name": null,
|
5993 |
"trial_params": null
|