romainnn commited on
Commit
6736489
·
verified ·
1 Parent(s): bd8794e

Training in progress, step 2600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3e64bed518d5a3a197c2fac91e03cb6fce0aa6dd03765c01b4f86cb957b7e2
3
  size 58680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60a8247fd6ba1f9fe0dbc2ab7765133457470ca6d3a44c71bf94f3d4c10f9e4b
3
  size 58680
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f2063981b6f95fd3274e8a3383acc470bca3c10986a8bbd142dd976c3789e6f
3
  size 127270
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba646ce7e6e999342a8ea23619ed724cecef86f809b7b64b8e29bc9e76ab814c
3
  size 127270
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a1f72280ad98da200b904c28d5ec70a04f6dfa5f81c635d879f903cadb9d61a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da464f1ab8886439566680dd97216fa326d723519bedac0470d5e0944caeac13
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cc12aecfae3056d8ec4a0f344b39564cd733f2ff9421f9ee526ed5951836aac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97ae609b712bd37f007cb4326617d16b954d56d87dcc6a35ec5b0ba67ade88b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 10.271383285522461,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-2500",
4
- "epoch": 1.7142857142857144,
5
  "eval_steps": 100,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17715,6 +17715,714 @@
17715
  "eval_samples_per_second": 335.868,
17716
  "eval_steps_per_second": 84.096,
17717
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17718
  }
17719
  ],
17720
  "logging_steps": 1,
@@ -17743,7 +18451,7 @@
17743
  "attributes": {}
17744
  }
17745
  },
17746
- "total_flos": 1033371648000000.0,
17747
  "train_batch_size": 4,
17748
  "trial_name": null,
17749
  "trial_params": null
 
1
  {
2
+ "best_metric": 10.27136516571045,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-2600",
4
+ "epoch": 1.7828434313137373,
5
  "eval_steps": 100,
6
+ "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17715
  "eval_samples_per_second": 335.868,
17716
  "eval_steps_per_second": 84.096,
17717
  "step": 2500
17718
+ },
17719
+ {
17720
+ "epoch": 1.7149712914559945,
17721
+ "grad_norm": 0.6548197865486145,
17722
+ "learning_rate": 1.607647829374337e-06,
17723
+ "loss": 82.2205,
17724
+ "step": 2501
17725
+ },
17726
+ {
17727
+ "epoch": 1.7156568686262748,
17728
+ "grad_norm": 0.6493549942970276,
17729
+ "learning_rate": 1.5864812845490574e-06,
17730
+ "loss": 82.2224,
17731
+ "step": 2502
17732
+ },
17733
+ {
17734
+ "epoch": 1.7163424457965548,
17735
+ "grad_norm": 0.6554645895957947,
17736
+ "learning_rate": 1.5654538915235051e-06,
17737
+ "loss": 82.2281,
17738
+ "step": 2503
17739
+ },
17740
+ {
17741
+ "epoch": 1.717028022966835,
17742
+ "grad_norm": 0.7031847834587097,
17743
+ "learning_rate": 1.5445656800293751e-06,
17744
+ "loss": 82.2184,
17745
+ "step": 2504
17746
+ },
17747
+ {
17748
+ "epoch": 1.7177136001371154,
17749
+ "grad_norm": 0.6444084048271179,
17750
+ "learning_rate": 1.5238166796015308e-06,
17751
+ "loss": 82.2735,
17752
+ "step": 2505
17753
+ },
17754
+ {
17755
+ "epoch": 1.7183991773073957,
17756
+ "grad_norm": 0.643793523311615,
17757
+ "learning_rate": 1.5032069195780374e-06,
17758
+ "loss": 82.1628,
17759
+ "step": 2506
17760
+ },
17761
+ {
17762
+ "epoch": 1.719084754477676,
17763
+ "grad_norm": 0.8285008668899536,
17764
+ "learning_rate": 1.4827364291000735e-06,
17765
+ "loss": 82.1914,
17766
+ "step": 2507
17767
+ },
17768
+ {
17769
+ "epoch": 1.7197703316479562,
17770
+ "grad_norm": 0.7288245558738708,
17771
+ "learning_rate": 1.4624052371118634e-06,
17772
+ "loss": 82.2586,
17773
+ "step": 2508
17774
+ },
17775
+ {
17776
+ "epoch": 1.7204559088182365,
17777
+ "grad_norm": 0.753436803817749,
17778
+ "learning_rate": 1.4422133723607233e-06,
17779
+ "loss": 82.2104,
17780
+ "step": 2509
17781
+ },
17782
+ {
17783
+ "epoch": 1.7211414859885166,
17784
+ "grad_norm": 0.7888475060462952,
17785
+ "learning_rate": 1.4221608633969263e-06,
17786
+ "loss": 82.2759,
17787
+ "step": 2510
17788
+ },
17789
+ {
17790
+ "epoch": 1.7218270631587969,
17791
+ "grad_norm": 0.8458419442176819,
17792
+ "learning_rate": 1.4022477385737365e-06,
17793
+ "loss": 82.269,
17794
+ "step": 2511
17795
+ },
17796
+ {
17797
+ "epoch": 1.722512640329077,
17798
+ "grad_norm": 0.7998586297035217,
17799
+ "learning_rate": 1.382474026047298e-06,
17800
+ "loss": 82.2513,
17801
+ "step": 2512
17802
+ },
17803
+ {
17804
+ "epoch": 1.7231982174993572,
17805
+ "grad_norm": 0.6696304678916931,
17806
+ "learning_rate": 1.3628397537766568e-06,
17807
+ "loss": 82.1494,
17808
+ "step": 2513
17809
+ },
17810
+ {
17811
+ "epoch": 1.7238837946696375,
17812
+ "grad_norm": 0.6818397045135498,
17813
+ "learning_rate": 1.343344949523706e-06,
17814
+ "loss": 82.2001,
17815
+ "step": 2514
17816
+ },
17817
+ {
17818
+ "epoch": 1.7245693718399178,
17819
+ "grad_norm": 0.6361696124076843,
17820
+ "learning_rate": 1.323989640853107e-06,
17821
+ "loss": 82.2448,
17822
+ "step": 2515
17823
+ },
17824
+ {
17825
+ "epoch": 1.725254949010198,
17826
+ "grad_norm": 0.6482613682746887,
17827
+ "learning_rate": 1.304773855132313e-06,
17828
+ "loss": 82.2519,
17829
+ "step": 2516
17830
+ },
17831
+ {
17832
+ "epoch": 1.7259405261804783,
17833
+ "grad_norm": 0.7171697616577148,
17834
+ "learning_rate": 1.2856976195314784e-06,
17835
+ "loss": 82.1872,
17836
+ "step": 2517
17837
+ },
17838
+ {
17839
+ "epoch": 1.7266261033507584,
17840
+ "grad_norm": 0.6757230758666992,
17841
+ "learning_rate": 1.26676096102345e-06,
17842
+ "loss": 82.2956,
17843
+ "step": 2518
17844
+ },
17845
+ {
17846
+ "epoch": 1.7273116805210387,
17847
+ "grad_norm": 0.6487752795219421,
17848
+ "learning_rate": 1.24796390638372e-06,
17849
+ "loss": 82.2076,
17850
+ "step": 2519
17851
+ },
17852
+ {
17853
+ "epoch": 1.7279972576913187,
17854
+ "grad_norm": 0.7632609009742737,
17855
+ "learning_rate": 1.2293064821903845e-06,
17856
+ "loss": 82.1435,
17857
+ "step": 2520
17858
+ },
17859
+ {
17860
+ "epoch": 1.728682834861599,
17861
+ "grad_norm": 0.614216148853302,
17862
+ "learning_rate": 1.210788714824096e-06,
17863
+ "loss": 82.2062,
17864
+ "step": 2521
17865
+ },
17866
+ {
17867
+ "epoch": 1.7293684120318793,
17868
+ "grad_norm": 0.837080717086792,
17869
+ "learning_rate": 1.192410630468066e-06,
17870
+ "loss": 82.2647,
17871
+ "step": 2522
17872
+ },
17873
+ {
17874
+ "epoch": 1.7300539892021596,
17875
+ "grad_norm": 0.6004711985588074,
17876
+ "learning_rate": 1.1741722551079859e-06,
17877
+ "loss": 82.2275,
17878
+ "step": 2523
17879
+ },
17880
+ {
17881
+ "epoch": 1.7307395663724399,
17882
+ "grad_norm": 0.692437469959259,
17883
+ "learning_rate": 1.1560736145320161e-06,
17884
+ "loss": 82.1699,
17885
+ "step": 2524
17886
+ },
17887
+ {
17888
+ "epoch": 1.7314251435427201,
17889
+ "grad_norm": 0.9786331057548523,
17890
+ "learning_rate": 1.1381147343307086e-06,
17891
+ "loss": 82.1797,
17892
+ "step": 2525
17893
+ },
17894
+ {
17895
+ "epoch": 1.7321107207130002,
17896
+ "grad_norm": 0.7589514851570129,
17897
+ "learning_rate": 1.1202956398970287e-06,
17898
+ "loss": 82.2553,
17899
+ "step": 2526
17900
+ },
17901
+ {
17902
+ "epoch": 1.7327962978832805,
17903
+ "grad_norm": 0.7883815765380859,
17904
+ "learning_rate": 1.1026163564263003e-06,
17905
+ "loss": 82.2521,
17906
+ "step": 2527
17907
+ },
17908
+ {
17909
+ "epoch": 1.7334818750535606,
17910
+ "grad_norm": 0.6983951330184937,
17911
+ "learning_rate": 1.0850769089161384e-06,
17912
+ "loss": 82.2064,
17913
+ "step": 2528
17914
+ },
17915
+ {
17916
+ "epoch": 1.7341674522238408,
17917
+ "grad_norm": 0.8901423215866089,
17918
+ "learning_rate": 1.0676773221664382e-06,
17919
+ "loss": 82.2177,
17920
+ "step": 2529
17921
+ },
17922
+ {
17923
+ "epoch": 1.7348530293941211,
17924
+ "grad_norm": 0.6684783697128296,
17925
+ "learning_rate": 1.0504176207793649e-06,
17926
+ "loss": 82.2156,
17927
+ "step": 2530
17928
+ },
17929
+ {
17930
+ "epoch": 1.7355386065644014,
17931
+ "grad_norm": 0.7410807013511658,
17932
+ "learning_rate": 1.0332978291592631e-06,
17933
+ "loss": 82.1716,
17934
+ "step": 2531
17935
+ },
17936
+ {
17937
+ "epoch": 1.7362241837346817,
17938
+ "grad_norm": 0.764433741569519,
17939
+ "learning_rate": 1.0163179715126593e-06,
17940
+ "loss": 82.1475,
17941
+ "step": 2532
17942
+ },
17943
+ {
17944
+ "epoch": 1.736909760904962,
17945
+ "grad_norm": 0.7061108946800232,
17946
+ "learning_rate": 9.994780718482367e-07,
17947
+ "loss": 82.2915,
17948
+ "step": 2533
17949
+ },
17950
+ {
17951
+ "epoch": 1.7375953380752422,
17952
+ "grad_norm": 0.8760568499565125,
17953
+ "learning_rate": 9.827781539767488e-07,
17954
+ "loss": 82.2156,
17955
+ "step": 2534
17956
+ },
17957
+ {
17958
+ "epoch": 1.7382809152455223,
17959
+ "grad_norm": 0.6002421975135803,
17960
+ "learning_rate": 9.662182415110632e-07,
17961
+ "loss": 82.1607,
17962
+ "step": 2535
17963
+ },
17964
+ {
17965
+ "epoch": 1.7389664924158026,
17966
+ "grad_norm": 0.6147943735122681,
17967
+ "learning_rate": 9.49798357866083e-07,
17968
+ "loss": 82.2697,
17969
+ "step": 2536
17970
+ },
17971
+ {
17972
+ "epoch": 1.7396520695860826,
17973
+ "grad_norm": 0.7360944151878357,
17974
+ "learning_rate": 9.335185262586699e-07,
17975
+ "loss": 82.2135,
17976
+ "step": 2537
17977
+ },
17978
+ {
17979
+ "epoch": 1.740337646756363,
17980
+ "grad_norm": 0.7279215455055237,
17981
+ "learning_rate": 9.173787697077107e-07,
17982
+ "loss": 82.199,
17983
+ "step": 2538
17984
+ },
17985
+ {
17986
+ "epoch": 1.7410232239266432,
17987
+ "grad_norm": 0.6556176543235779,
17988
+ "learning_rate": 9.013791110340175e-07,
17989
+ "loss": 82.2059,
17990
+ "step": 2539
17991
+ },
17992
+ {
17993
+ "epoch": 1.7417088010969235,
17994
+ "grad_norm": 0.6386843919754028,
17995
+ "learning_rate": 8.855195728602939e-07,
17996
+ "loss": 82.1856,
17997
+ "step": 2540
17998
+ },
17999
+ {
18000
+ "epoch": 1.7423943782672038,
18001
+ "grad_norm": 0.6329452991485596,
18002
+ "learning_rate": 8.698001776111575e-07,
18003
+ "loss": 82.2459,
18004
+ "step": 2541
18005
+ },
18006
+ {
18007
+ "epoch": 1.743079955437484,
18008
+ "grad_norm": 0.6525189876556396,
18009
+ "learning_rate": 8.542209475130292e-07,
18010
+ "loss": 82.1942,
18011
+ "step": 2542
18012
+ },
18013
+ {
18014
+ "epoch": 1.7437655326077641,
18015
+ "grad_norm": 0.5788668990135193,
18016
+ "learning_rate": 8.387819045941769e-07,
18017
+ "loss": 82.183,
18018
+ "step": 2543
18019
+ },
18020
+ {
18021
+ "epoch": 1.7444511097780444,
18022
+ "grad_norm": 0.7355834245681763,
18023
+ "learning_rate": 8.234830706846164e-07,
18024
+ "loss": 82.2277,
18025
+ "step": 2544
18026
+ },
18027
+ {
18028
+ "epoch": 1.7451366869483245,
18029
+ "grad_norm": 0.6216195225715637,
18030
+ "learning_rate": 8.08324467416155e-07,
18031
+ "loss": 82.2398,
18032
+ "step": 2545
18033
+ },
18034
+ {
18035
+ "epoch": 1.7458222641186047,
18036
+ "grad_norm": 0.6454034447669983,
18037
+ "learning_rate": 7.933061162222921e-07,
18038
+ "loss": 82.2615,
18039
+ "step": 2546
18040
+ },
18041
+ {
18042
+ "epoch": 1.746507841288885,
18043
+ "grad_norm": 0.533258855342865,
18044
+ "learning_rate": 7.784280383382192e-07,
18045
+ "loss": 82.2377,
18046
+ "step": 2547
18047
+ },
18048
+ {
18049
+ "epoch": 1.7471934184591653,
18050
+ "grad_norm": 0.644951343536377,
18051
+ "learning_rate": 7.636902548008085e-07,
18052
+ "loss": 82.174,
18053
+ "step": 2548
18054
+ },
18055
+ {
18056
+ "epoch": 1.7478789956294456,
18057
+ "grad_norm": 0.7034481763839722,
18058
+ "learning_rate": 7.490927864485464e-07,
18059
+ "loss": 82.1811,
18060
+ "step": 2549
18061
+ },
18062
+ {
18063
+ "epoch": 1.7485645727997259,
18064
+ "grad_norm": 0.7296063899993896,
18065
+ "learning_rate": 7.346356539215116e-07,
18066
+ "loss": 82.2469,
18067
+ "step": 2550
18068
+ },
18069
+ {
18070
+ "epoch": 1.7492501499700062,
18071
+ "grad_norm": 0.701720118522644,
18072
+ "learning_rate": 7.203188776613745e-07,
18073
+ "loss": 82.1859,
18074
+ "step": 2551
18075
+ },
18076
+ {
18077
+ "epoch": 1.7499357271402862,
18078
+ "grad_norm": 0.6307011842727661,
18079
+ "learning_rate": 7.061424779113424e-07,
18080
+ "loss": 82.2474,
18081
+ "step": 2552
18082
+ },
18083
+ {
18084
+ "epoch": 1.7506213043105665,
18085
+ "grad_norm": 0.6798613667488098,
18086
+ "learning_rate": 6.921064747161476e-07,
18087
+ "loss": 82.1944,
18088
+ "step": 2553
18089
+ },
18090
+ {
18091
+ "epoch": 1.7513068814808466,
18092
+ "grad_norm": 0.6421661972999573,
18093
+ "learning_rate": 6.782108879219817e-07,
18094
+ "loss": 82.2346,
18095
+ "step": 2554
18096
+ },
18097
+ {
18098
+ "epoch": 1.7519924586511268,
18099
+ "grad_norm": 0.7070729732513428,
18100
+ "learning_rate": 6.64455737176517e-07,
18101
+ "loss": 82.2537,
18102
+ "step": 2555
18103
+ },
18104
+ {
18105
+ "epoch": 1.7526780358214071,
18106
+ "grad_norm": 0.60176682472229,
18107
+ "learning_rate": 6.508410419288513e-07,
18108
+ "loss": 82.1744,
18109
+ "step": 2556
18110
+ },
18111
+ {
18112
+ "epoch": 1.7533636129916874,
18113
+ "grad_norm": 0.7718641757965088,
18114
+ "learning_rate": 6.373668214294859e-07,
18115
+ "loss": 82.2105,
18116
+ "step": 2557
18117
+ },
18118
+ {
18119
+ "epoch": 1.7540491901619677,
18120
+ "grad_norm": 0.8797594308853149,
18121
+ "learning_rate": 6.240330947302808e-07,
18122
+ "loss": 82.2068,
18123
+ "step": 2558
18124
+ },
18125
+ {
18126
+ "epoch": 1.754734767332248,
18127
+ "grad_norm": 0.7588513493537903,
18128
+ "learning_rate": 6.108398806844662e-07,
18129
+ "loss": 82.2615,
18130
+ "step": 2559
18131
+ },
18132
+ {
18133
+ "epoch": 1.755420344502528,
18134
+ "grad_norm": 0.6733604073524475,
18135
+ "learning_rate": 5.977871979465977e-07,
18136
+ "loss": 82.2358,
18137
+ "step": 2560
18138
+ },
18139
+ {
18140
+ "epoch": 1.7561059216728083,
18141
+ "grad_norm": 0.5898498892784119,
18142
+ "learning_rate": 5.848750649725121e-07,
18143
+ "loss": 82.1374,
18144
+ "step": 2561
18145
+ },
18146
+ {
18147
+ "epoch": 1.7567914988430884,
18148
+ "grad_norm": 0.7103486657142639,
18149
+ "learning_rate": 5.721035000193165e-07,
18150
+ "loss": 82.2805,
18151
+ "step": 2562
18152
+ },
18153
+ {
18154
+ "epoch": 1.7574770760133687,
18155
+ "grad_norm": 0.8472493886947632,
18156
+ "learning_rate": 5.594725211453655e-07,
18157
+ "loss": 82.1691,
18158
+ "step": 2563
18159
+ },
18160
+ {
18161
+ "epoch": 1.758162653183649,
18162
+ "grad_norm": 0.6238364577293396,
18163
+ "learning_rate": 5.469821462102398e-07,
18164
+ "loss": 82.1667,
18165
+ "step": 2564
18166
+ },
18167
+ {
18168
+ "epoch": 1.7588482303539292,
18169
+ "grad_norm": 0.6626426577568054,
18170
+ "learning_rate": 5.34632392874701e-07,
18171
+ "loss": 82.2053,
18172
+ "step": 2565
18173
+ },
18174
+ {
18175
+ "epoch": 1.7595338075242095,
18176
+ "grad_norm": 0.76328045129776,
18177
+ "learning_rate": 5.224232786006811e-07,
18178
+ "loss": 82.1946,
18179
+ "step": 2566
18180
+ },
18181
+ {
18182
+ "epoch": 1.7602193846944898,
18183
+ "grad_norm": 0.695488691329956,
18184
+ "learning_rate": 5.103548206512487e-07,
18185
+ "loss": 82.1913,
18186
+ "step": 2567
18187
+ },
18188
+ {
18189
+ "epoch": 1.7609049618647699,
18190
+ "grad_norm": 0.6686795353889465,
18191
+ "learning_rate": 4.984270360906207e-07,
18192
+ "loss": 82.2561,
18193
+ "step": 2568
18194
+ },
18195
+ {
18196
+ "epoch": 1.7615905390350501,
18197
+ "grad_norm": 0.6828111410140991,
18198
+ "learning_rate": 4.866399417840839e-07,
18199
+ "loss": 82.2495,
18200
+ "step": 2569
18201
+ },
18202
+ {
18203
+ "epoch": 1.7622761162053302,
18204
+ "grad_norm": 0.6867351531982422,
18205
+ "learning_rate": 4.7499355439798443e-07,
18206
+ "loss": 82.2579,
18207
+ "step": 2570
18208
+ },
18209
+ {
18210
+ "epoch": 1.7629616933756105,
18211
+ "grad_norm": 0.8694186806678772,
18212
+ "learning_rate": 4.6348789039974973e-07,
18213
+ "loss": 82.2177,
18214
+ "step": 2571
18215
+ },
18216
+ {
18217
+ "epoch": 1.7636472705458908,
18218
+ "grad_norm": 0.823703408241272,
18219
+ "learning_rate": 4.521229660578108e-07,
18220
+ "loss": 82.1858,
18221
+ "step": 2572
18222
+ },
18223
+ {
18224
+ "epoch": 1.764332847716171,
18225
+ "grad_norm": 0.6795697212219238,
18226
+ "learning_rate": 4.4089879744160234e-07,
18227
+ "loss": 82.2448,
18228
+ "step": 2573
18229
+ },
18230
+ {
18231
+ "epoch": 1.7650184248864513,
18232
+ "grad_norm": 0.6945237517356873,
18233
+ "learning_rate": 4.2981540042152934e-07,
18234
+ "loss": 82.2162,
18235
+ "step": 2574
18236
+ },
18237
+ {
18238
+ "epoch": 1.7657040020567316,
18239
+ "grad_norm": 0.7060695290565491,
18240
+ "learning_rate": 4.188727906689782e-07,
18241
+ "loss": 82.2068,
18242
+ "step": 2575
18243
+ },
18244
+ {
18245
+ "epoch": 1.766389579227012,
18246
+ "grad_norm": 0.7274417877197266,
18247
+ "learning_rate": 4.080709836562391e-07,
18248
+ "loss": 82.2307,
18249
+ "step": 2576
18250
+ },
18251
+ {
18252
+ "epoch": 1.767075156397292,
18253
+ "grad_norm": 0.6976255774497986,
18254
+ "learning_rate": 3.9740999465653904e-07,
18255
+ "loss": 82.1747,
18256
+ "step": 2577
18257
+ },
18258
+ {
18259
+ "epoch": 1.7677607335675722,
18260
+ "grad_norm": 0.8024837374687195,
18261
+ "learning_rate": 3.8688983874396454e-07,
18262
+ "loss": 82.2583,
18263
+ "step": 2578
18264
+ },
18265
+ {
18266
+ "epoch": 1.7684463107378523,
18267
+ "grad_norm": 0.825741171836853,
18268
+ "learning_rate": 3.7651053079350575e-07,
18269
+ "loss": 82.2569,
18270
+ "step": 2579
18271
+ },
18272
+ {
18273
+ "epoch": 1.7691318879081326,
18274
+ "grad_norm": 0.8342203497886658,
18275
+ "learning_rate": 3.6627208548097866e-07,
18276
+ "loss": 82.1863,
18277
+ "step": 2580
18278
+ },
18279
+ {
18280
+ "epoch": 1.7698174650784129,
18281
+ "grad_norm": 0.7146753668785095,
18282
+ "learning_rate": 3.561745172830477e-07,
18283
+ "loss": 82.2755,
18284
+ "step": 2581
18285
+ },
18286
+ {
18287
+ "epoch": 1.7705030422486931,
18288
+ "grad_norm": 0.6945064067840576,
18289
+ "learning_rate": 3.462178404771477e-07,
18290
+ "loss": 82.2087,
18291
+ "step": 2582
18292
+ },
18293
+ {
18294
+ "epoch": 1.7711886194189734,
18295
+ "grad_norm": 0.7338677048683167,
18296
+ "learning_rate": 3.3640206914153927e-07,
18297
+ "loss": 82.2696,
18298
+ "step": 2583
18299
+ },
18300
+ {
18301
+ "epoch": 1.7718741965892537,
18302
+ "grad_norm": 0.6690239906311035,
18303
+ "learning_rate": 3.267272171552316e-07,
18304
+ "loss": 82.2093,
18305
+ "step": 2584
18306
+ },
18307
+ {
18308
+ "epoch": 1.7725597737595338,
18309
+ "grad_norm": 0.9962079524993896,
18310
+ "learning_rate": 3.171932981979708e-07,
18311
+ "loss": 82.2489,
18312
+ "step": 2585
18313
+ },
18314
+ {
18315
+ "epoch": 1.773245350929814,
18316
+ "grad_norm": 0.7202107310295105,
18317
+ "learning_rate": 3.0780032575025155e-07,
18318
+ "loss": 82.1385,
18319
+ "step": 2586
18320
+ },
18321
+ {
18322
+ "epoch": 1.7739309281000941,
18323
+ "grad_norm": 0.7426759600639343,
18324
+ "learning_rate": 2.9854831309327204e-07,
18325
+ "loss": 82.2194,
18326
+ "step": 2587
18327
+ },
18328
+ {
18329
+ "epoch": 1.7746165052703744,
18330
+ "grad_norm": 0.5640959143638611,
18331
+ "learning_rate": 2.8943727330890125e-07,
18332
+ "loss": 82.2244,
18333
+ "step": 2588
18334
+ },
18335
+ {
18336
+ "epoch": 1.7753020824406547,
18337
+ "grad_norm": 0.7230386734008789,
18338
+ "learning_rate": 2.80467219279712e-07,
18339
+ "loss": 82.1554,
18340
+ "step": 2589
18341
+ },
18342
+ {
18343
+ "epoch": 1.775987659610935,
18344
+ "grad_norm": 0.6880918145179749,
18345
+ "learning_rate": 2.7163816368890314e-07,
18346
+ "loss": 82.2226,
18347
+ "step": 2590
18348
+ },
18349
+ {
18350
+ "epoch": 1.7766732367812152,
18351
+ "grad_norm": 0.8692451119422913,
18352
+ "learning_rate": 2.6295011902031097e-07,
18353
+ "loss": 82.2665,
18354
+ "step": 2591
18355
+ },
18356
+ {
18357
+ "epoch": 1.7773588139514955,
18358
+ "grad_norm": 0.7101627588272095,
18359
+ "learning_rate": 2.5440309755839775e-07,
18360
+ "loss": 82.2309,
18361
+ "step": 2592
18362
+ },
18363
+ {
18364
+ "epoch": 1.7780443911217756,
18365
+ "grad_norm": 0.7459553480148315,
18366
+ "learning_rate": 2.459971113882409e-07,
18367
+ "loss": 82.2684,
18368
+ "step": 2593
18369
+ },
18370
+ {
18371
+ "epoch": 1.7787299682920559,
18372
+ "grad_norm": 0.6015447974205017,
18373
+ "learning_rate": 2.377321723954773e-07,
18374
+ "loss": 82.2359,
18375
+ "step": 2594
18376
+ },
18377
+ {
18378
+ "epoch": 1.7794155454623362,
18379
+ "grad_norm": 0.825840175151825,
18380
+ "learning_rate": 2.2960829226631452e-07,
18381
+ "loss": 82.2056,
18382
+ "step": 2595
18383
+ },
18384
+ {
18385
+ "epoch": 1.7801011226326162,
18386
+ "grad_norm": 0.7459067106246948,
18387
+ "learning_rate": 2.216254824875197e-07,
18388
+ "loss": 82.218,
18389
+ "step": 2596
18390
+ },
18391
+ {
18392
+ "epoch": 1.7807866998028965,
18393
+ "grad_norm": 0.6501146554946899,
18394
+ "learning_rate": 2.13783754346375e-07,
18395
+ "loss": 82.1983,
18396
+ "step": 2597
18397
+ },
18398
+ {
18399
+ "epoch": 1.7814722769731768,
18400
+ "grad_norm": 0.6280929446220398,
18401
+ "learning_rate": 2.060831189307e-07,
18402
+ "loss": 82.2458,
18403
+ "step": 2598
18404
+ },
18405
+ {
18406
+ "epoch": 1.782157854143457,
18407
+ "grad_norm": 0.7218676209449768,
18408
+ "learning_rate": 1.9852358712880713e-07,
18409
+ "loss": 82.2687,
18410
+ "step": 2599
18411
+ },
18412
+ {
18413
+ "epoch": 1.7828434313137373,
18414
+ "grad_norm": 0.6138923168182373,
18415
+ "learning_rate": 1.9110516962950186e-07,
18416
+ "loss": 82.2733,
18417
+ "step": 2600
18418
+ },
18419
+ {
18420
+ "epoch": 1.7828434313137373,
18421
+ "eval_loss": 10.27136516571045,
18422
+ "eval_runtime": 5.7783,
18423
+ "eval_samples_per_second": 336.607,
18424
+ "eval_steps_per_second": 84.282,
18425
+ "step": 2600
18426
  }
18427
  ],
18428
  "logging_steps": 1,
 
18451
  "attributes": {}
18452
  }
18453
  },
18454
+ "total_flos": 1074706513920000.0,
18455
  "train_batch_size": 4,
18456
  "trial_name": null,
18457
  "trial_params": null