|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.995306228883699, |
|
"eval_steps": 500, |
|
"global_step": 1032, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.029024285163601733, |
|
"grad_norm": 88.48429870605469, |
|
"learning_rate": 9.677419354838708e-05, |
|
"loss": 0.7958, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.058048570327203466, |
|
"grad_norm": 78.84452056884766, |
|
"learning_rate": 0.00019354838709677416, |
|
"loss": 0.7504, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08707285549080519, |
|
"grad_norm": 213.83749389648438, |
|
"learning_rate": 0.00029032258064516127, |
|
"loss": 0.7391, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11609714065440693, |
|
"grad_norm": 64.08855438232422, |
|
"learning_rate": 0.00029994016586766087, |
|
"loss": 0.7567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14512142581800866, |
|
"grad_norm": 71.85431671142578, |
|
"learning_rate": 0.00029973339311370587, |
|
"loss": 0.7117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17414571098161039, |
|
"grad_norm": 66.32382202148438, |
|
"learning_rate": 0.00029937914664890375, |
|
"loss": 0.6959, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.20316999614521214, |
|
"grad_norm": 52.99678039550781, |
|
"learning_rate": 0.00029887777537365414, |
|
"loss": 0.6835, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23219428130881387, |
|
"grad_norm": 53.15193557739258, |
|
"learning_rate": 0.0002982297730928522, |
|
"loss": 0.6855, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2612185664724156, |
|
"grad_norm": 62.969337463378906, |
|
"learning_rate": 0.00029743577802953563, |
|
"loss": 0.6758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2902428516360173, |
|
"grad_norm": 47.597293853759766, |
|
"learning_rate": 0.00029649657219629316, |
|
"loss": 0.665, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31926713679961904, |
|
"grad_norm": 50.93095397949219, |
|
"learning_rate": 0.00029541308062505385, |
|
"loss": 0.6689, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34829142196322077, |
|
"grad_norm": 44.195335388183594, |
|
"learning_rate": 0.00029418637045601514, |
|
"loss": 0.6553, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3773157071268225, |
|
"grad_norm": 46.52369689941406, |
|
"learning_rate": 0.00029281764988660705, |
|
"loss": 0.6584, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4063399922904243, |
|
"grad_norm": 51.798343658447266, |
|
"learning_rate": 0.0002913082669815285, |
|
"loss": 0.6514, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.435364277454026, |
|
"grad_norm": 53.8443489074707, |
|
"learning_rate": 0.0002896597083450262, |
|
"loss": 0.6276, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46438856261762773, |
|
"grad_norm": 44.94770812988281, |
|
"learning_rate": 0.0002878735976567259, |
|
"loss": 0.6428, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49341284778122946, |
|
"grad_norm": 38.52789306640625, |
|
"learning_rate": 0.0002859516940724558, |
|
"loss": 0.6415, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5224371329448312, |
|
"grad_norm": 52.5710563659668, |
|
"learning_rate": 0.0002838958904916392, |
|
"loss": 0.6302, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.551461418108433, |
|
"grad_norm": 46.27107238769531, |
|
"learning_rate": 0.00028170821169296126, |
|
"loss": 0.6246, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5804857032720346, |
|
"grad_norm": 42.310123443603516, |
|
"learning_rate": 0.00027939081234014705, |
|
"loss": 0.627, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6095099884356364, |
|
"grad_norm": 48.09523391723633, |
|
"learning_rate": 0.0002769459748598149, |
|
"loss": 0.623, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6385342735992381, |
|
"grad_norm": 62.250152587890625, |
|
"learning_rate": 0.0002743761071934942, |
|
"loss": 0.6312, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6675585587628399, |
|
"grad_norm": 42.713130950927734, |
|
"learning_rate": 0.00027168374042602366, |
|
"loss": 0.6101, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6965828439264415, |
|
"grad_norm": 49.83562469482422, |
|
"learning_rate": 0.00026887152629266354, |
|
"loss": 0.6, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7256071290900433, |
|
"grad_norm": 39.01671600341797, |
|
"learning_rate": 0.0002659422345673789, |
|
"loss": 0.6038, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.754631414253645, |
|
"grad_norm": 35.13432693481445, |
|
"learning_rate": 0.0002628987503348651, |
|
"loss": 0.5956, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7836556994172468, |
|
"grad_norm": 41.503684997558594, |
|
"learning_rate": 0.00025974407114900353, |
|
"loss": 0.6134, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8126799845808486, |
|
"grad_norm": 39.328548431396484, |
|
"learning_rate": 0.0002564813040805443, |
|
"loss": 0.59, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8417042697444502, |
|
"grad_norm": 34.63987731933594, |
|
"learning_rate": 0.0002531136626569259, |
|
"loss": 0.5834, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.870728554908052, |
|
"grad_norm": 37.82402801513672, |
|
"learning_rate": 0.0002496444636972439, |
|
"loss": 0.6023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8997528400716537, |
|
"grad_norm": 38.01532745361328, |
|
"learning_rate": 0.0002460771240454877, |
|
"loss": 0.5866, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9287771252352555, |
|
"grad_norm": 37.758487701416016, |
|
"learning_rate": 0.00024241515720526083, |
|
"loss": 0.6001, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9578014103988571, |
|
"grad_norm": 34.032989501953125, |
|
"learning_rate": 0.0002386621698793015, |
|
"loss": 0.5833, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9868256955624589, |
|
"grad_norm": 41.784881591796875, |
|
"learning_rate": 0.0002348218584172095, |
|
"loss": 0.5876, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0158499807260606, |
|
"grad_norm": 35.09678268432617, |
|
"learning_rate": 0.00023089800517487986, |
|
"loss": 0.5319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0448742658896624, |
|
"grad_norm": 32.305877685546875, |
|
"learning_rate": 0.00022689447478922784, |
|
"loss": 0.4666, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0738985510532641, |
|
"grad_norm": 35.80933380126953, |
|
"learning_rate": 0.0002228152103718745, |
|
"loss": 0.4619, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.102922836216866, |
|
"grad_norm": 32.89548873901367, |
|
"learning_rate": 0.00021866422962554238, |
|
"loss": 0.4739, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1319471213804675, |
|
"grad_norm": 36.34146499633789, |
|
"learning_rate": 0.0002144456208869851, |
|
"loss": 0.4676, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1609714065440693, |
|
"grad_norm": 42.522438049316406, |
|
"learning_rate": 0.00021016353910034938, |
|
"loss": 0.4765, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.189995691707671, |
|
"grad_norm": 34.677650451660156, |
|
"learning_rate": 0.00020582220172493467, |
|
"loss": 0.4715, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2190199768712728, |
|
"grad_norm": 33.74694061279297, |
|
"learning_rate": 0.0002014258845813811, |
|
"loss": 0.4655, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2480442620348744, |
|
"grad_norm": 30.60100555419922, |
|
"learning_rate": 0.00019697891764037685, |
|
"loss": 0.461, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2770685471984762, |
|
"grad_norm": 38.6037483215332, |
|
"learning_rate": 0.00019248568075803257, |
|
"loss": 0.4719, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.306092832362078, |
|
"grad_norm": 32.19020080566406, |
|
"learning_rate": 0.00018795059936212348, |
|
"loss": 0.4586, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3351171175256797, |
|
"grad_norm": 32.962276458740234, |
|
"learning_rate": 0.00018337814009344714, |
|
"loss": 0.4697, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3641414026892815, |
|
"grad_norm": 29.69386863708496, |
|
"learning_rate": 0.00017877280640659068, |
|
"loss": 0.4639, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.393165687852883, |
|
"grad_norm": 31.52634620666504, |
|
"learning_rate": 0.00017413913413443915, |
|
"loss": 0.4579, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4221899730164849, |
|
"grad_norm": 35.30017852783203, |
|
"learning_rate": 0.0001694816870207949, |
|
"loss": 0.4684, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4512142581800866, |
|
"grad_norm": 33.88492202758789, |
|
"learning_rate": 0.00016480505222550682, |
|
"loss": 0.4534, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4802385433436884, |
|
"grad_norm": 30.00653076171875, |
|
"learning_rate": 0.00016011383580653697, |
|
"loss": 0.464, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.50926282850729, |
|
"grad_norm": 33.75349807739258, |
|
"learning_rate": 0.00015541265818341433, |
|
"loss": 0.4497, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5382871136708918, |
|
"grad_norm": 31.689538955688477, |
|
"learning_rate": 0.00015070614958654393, |
|
"loss": 0.4412, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5673113988344936, |
|
"grad_norm": 28.848291397094727, |
|
"learning_rate": 0.00014599894549685273, |
|
"loss": 0.4467, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5963356839980953, |
|
"grad_norm": 27.079084396362305, |
|
"learning_rate": 0.0001412956820802647, |
|
"loss": 0.4428, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6253599691616971, |
|
"grad_norm": 29.99922752380371, |
|
"learning_rate": 0.0001366009916215007, |
|
"loss": 0.4374, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.654384254325299, |
|
"grad_norm": 28.763559341430664, |
|
"learning_rate": 0.00013191949796170156, |
|
"loss": 0.4419, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.6834085394889005, |
|
"grad_norm": 30.430801391601562, |
|
"learning_rate": 0.00012725581194436694, |
|
"loss": 0.445, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.7124328246525022, |
|
"grad_norm": 28.43861198425293, |
|
"learning_rate": 0.00012261452687409576, |
|
"loss": 0.4452, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7414571098161038, |
|
"grad_norm": 33.317378997802734, |
|
"learning_rate": 0.00011800021399260094, |
|
"loss": 0.4378, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7704813949797056, |
|
"grad_norm": 27.84680938720703, |
|
"learning_rate": 0.00011341741797645384, |
|
"loss": 0.4375, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7995056801433074, |
|
"grad_norm": 32.20744705200195, |
|
"learning_rate": 0.0001088706524609933, |
|
"loss": 0.4281, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8285299653069091, |
|
"grad_norm": 29.68756675720215, |
|
"learning_rate": 0.00010436439559480705, |
|
"loss": 0.4338, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.857554250470511, |
|
"grad_norm": 31.973575592041016, |
|
"learning_rate": 9.990308562916479e-05, |
|
"loss": 0.4265, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8865785356341127, |
|
"grad_norm": 26.948545455932617, |
|
"learning_rate": 9.549111654674586e-05, |
|
"loss": 0.4165, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9156028207977145, |
|
"grad_norm": 27.91978645324707, |
|
"learning_rate": 9.11328337339681e-05, |
|
"loss": 0.416, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.944627105961316, |
|
"grad_norm": 34.58734130859375, |
|
"learning_rate": 8.68325297011791e-05, |
|
"loss": 0.4196, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.9736513911249178, |
|
"grad_norm": 24.959909439086914, |
|
"learning_rate": 8.259443985492576e-05, |
|
"loss": 0.4305, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.0026756762885194, |
|
"grad_norm": 39.029258728027344, |
|
"learning_rate": 7.842273832646591e-05, |
|
"loss": 0.4122, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.031699961452121, |
|
"grad_norm": 27.386505126953125, |
|
"learning_rate": 7.432153386063034e-05, |
|
"loss": 0.2751, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.060724246615723, |
|
"grad_norm": 30.209821701049805, |
|
"learning_rate": 7.029486576908444e-05, |
|
"loss": 0.2654, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.0897485317793247, |
|
"grad_norm": 31.79279327392578, |
|
"learning_rate": 6.63466999519756e-05, |
|
"loss": 0.2648, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.1187728169429265, |
|
"grad_norm": 31.363250732421875, |
|
"learning_rate": 6.248092499188372e-05, |
|
"loss": 0.2587, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.1477971021065283, |
|
"grad_norm": 33.62345886230469, |
|
"learning_rate": 5.870134832392269e-05, |
|
"loss": 0.2564, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.17682138727013, |
|
"grad_norm": 31.332040786743164, |
|
"learning_rate": 5.5011692485764734e-05, |
|
"loss": 0.253, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.205845672433732, |
|
"grad_norm": 30.034757614135742, |
|
"learning_rate": 5.141559145128093e-05, |
|
"loss": 0.26, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.234869957597333, |
|
"grad_norm": 30.40983772277832, |
|
"learning_rate": 4.791658705140897e-05, |
|
"loss": 0.2507, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.263894242760935, |
|
"grad_norm": 27.134634017944336, |
|
"learning_rate": 4.451812548577333e-05, |
|
"loss": 0.2518, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.2929185279245368, |
|
"grad_norm": 27.9604434967041, |
|
"learning_rate": 4.1223553928493564e-05, |
|
"loss": 0.2494, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.3219428130881385, |
|
"grad_norm": 33.73405838012695, |
|
"learning_rate": 3.803611723152345e-05, |
|
"loss": 0.2441, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.3509670982517403, |
|
"grad_norm": 31.413331985473633, |
|
"learning_rate": 3.495895472876854e-05, |
|
"loss": 0.2479, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.379991383415342, |
|
"grad_norm": 28.82455062866211, |
|
"learning_rate": 3.199509714412901e-05, |
|
"loss": 0.2529, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.409015668578944, |
|
"grad_norm": 31.402931213378906, |
|
"learning_rate": 2.9147463606513528e-05, |
|
"loss": 0.2499, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.4380399537425457, |
|
"grad_norm": 25.637739181518555, |
|
"learning_rate": 2.6418858774763992e-05, |
|
"loss": 0.236, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.467064238906147, |
|
"grad_norm": 27.47572898864746, |
|
"learning_rate": 2.38119700753228e-05, |
|
"loss": 0.2432, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.496088524069749, |
|
"grad_norm": 28.527973175048828, |
|
"learning_rate": 2.1329365055363595e-05, |
|
"loss": 0.2428, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.5251128092333506, |
|
"grad_norm": 28.3017578125, |
|
"learning_rate": 1.89734888539916e-05, |
|
"loss": 0.2457, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.5541370943969524, |
|
"grad_norm": 27.692001342773438, |
|
"learning_rate": 1.674666179400504e-05, |
|
"loss": 0.2409, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.583161379560554, |
|
"grad_norm": 30.592241287231445, |
|
"learning_rate": 1.4651077096589486e-05, |
|
"loss": 0.2371, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.612185664724156, |
|
"grad_norm": 26.051584243774414, |
|
"learning_rate": 1.2688798721195053e-05, |
|
"loss": 0.2389, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6412099498877577, |
|
"grad_norm": 28.38836097717285, |
|
"learning_rate": 1.086175933272514e-05, |
|
"loss": 0.2407, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.6702342350513595, |
|
"grad_norm": 27.81374740600586, |
|
"learning_rate": 9.171758398038015e-06, |
|
"loss": 0.2389, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.6992585202149613, |
|
"grad_norm": 27.540956497192383, |
|
"learning_rate": 7.620460413636342e-06, |
|
"loss": 0.2453, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.728282805378563, |
|
"grad_norm": 27.374300003051758, |
|
"learning_rate": 6.209393266290291e-06, |
|
"loss": 0.234, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.757307090542165, |
|
"grad_norm": 29.071474075317383, |
|
"learning_rate": 4.939946728208627e-06, |
|
"loss": 0.2406, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.786331375705766, |
|
"grad_norm": 25.93909454345703, |
|
"learning_rate": 3.813371088240086e-06, |
|
"loss": 0.231, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.815355660869368, |
|
"grad_norm": 28.83918571472168, |
|
"learning_rate": 2.830775920453093e-06, |
|
"loss": 0.2303, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.8443799460329697, |
|
"grad_norm": 28.06920623779297, |
|
"learning_rate": 1.9931289913066694e-06, |
|
"loss": 0.2339, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.8734042311965715, |
|
"grad_norm": 28.357439041137695, |
|
"learning_rate": 1.3012553064889631e-06, |
|
"loss": 0.2325, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.9024285163601733, |
|
"grad_norm": 25.29115104675293, |
|
"learning_rate": 7.558362983619448e-07, |
|
"loss": 0.2374, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.931452801523775, |
|
"grad_norm": 27.02465057373047, |
|
"learning_rate": 3.57409154812871e-07, |
|
"loss": 0.2307, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.960477086687377, |
|
"grad_norm": 26.2918701171875, |
|
"learning_rate": 1.0636629017320431e-07, |
|
"loss": 0.232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.989501371850978, |
|
"grad_norm": 28.43804359436035, |
|
"learning_rate": 2.9549587264754428e-09, |
|
"loss": 0.2287, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.995306228883699, |
|
"step": 1032, |
|
"total_flos": 1.0711204212442399e+18, |
|
"train_loss": 0.44727156865735385, |
|
"train_runtime": 21178.1386, |
|
"train_samples_per_second": 6.247, |
|
"train_steps_per_second": 0.049 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1032, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0711204212442399e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|