Airbnb-CBv0.1-M-3epoch / trainer_state.json
pbarker's picture
Upload folder using huggingface_hub
7a91659 verified
{
"best_metric": 0.21225065,
"best_model_checkpoint": "/workspace/output/molmo-7b-d/v1-20250103-233013/checkpoint-414",
"epoch": 3.0,
"eval_steps": 200,
"global_step": 414,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.93268561,
"epoch": 0.007272727272727273,
"grad_norm": 4.360905168604235,
"learning_rate": 0.0,
"loss": 0.15919656,
"memory(GiB)": 131.1,
"step": 1,
"train_speed(iter/s)": 0.051814
},
{
"acc": 0.93592656,
"epoch": 0.03636363636363636,
"grad_norm": 5.722024176509264,
"learning_rate": 4.829949384917788e-06,
"loss": 0.16946605,
"memory(GiB)": 131.7,
"step": 5,
"train_speed(iter/s)": 0.164947
},
{
"acc": 0.92745094,
"epoch": 0.07272727272727272,
"grad_norm": 5.449760109713864,
"learning_rate": 6.910095361682884e-06,
"loss": 0.19423571,
"memory(GiB)": 131.7,
"step": 10,
"train_speed(iter/s)": 0.228063
},
{
"acc": 0.9191308,
"epoch": 0.10909090909090909,
"grad_norm": 5.591882854054257,
"learning_rate": 8.126902754116446e-06,
"loss": 0.2132081,
"memory(GiB)": 131.7,
"step": 15,
"train_speed(iter/s)": 0.262462
},
{
"acc": 0.91665707,
"epoch": 0.14545454545454545,
"grad_norm": 8.643333147328232,
"learning_rate": 8.990241338447979e-06,
"loss": 0.2489913,
"memory(GiB)": 132.85,
"step": 20,
"train_speed(iter/s)": 0.283686
},
{
"acc": 0.92767944,
"epoch": 0.18181818181818182,
"grad_norm": 5.521300358752013,
"learning_rate": 9.659898769835576e-06,
"loss": 0.20275159,
"memory(GiB)": 100.16,
"step": 25,
"train_speed(iter/s)": 0.297772
},
{
"acc": 0.91237392,
"epoch": 0.21818181818181817,
"grad_norm": 4.691287833576054,
"learning_rate": 9.999635040777627e-06,
"loss": 0.24152677,
"memory(GiB)": 100.16,
"step": 30,
"train_speed(iter/s)": 0.306552
},
{
"acc": 0.89811802,
"epoch": 0.2545454545454545,
"grad_norm": 2.3267326177072176,
"learning_rate": 9.995529861428146e-06,
"loss": 0.2682821,
"memory(GiB)": 100.16,
"step": 35,
"train_speed(iter/s)": 0.312152
},
{
"acc": 0.91184635,
"epoch": 0.2909090909090909,
"grad_norm": 3.241573273096398,
"learning_rate": 9.986867061882612e-06,
"loss": 0.23578806,
"memory(GiB)": 100.16,
"step": 40,
"train_speed(iter/s)": 0.314978
},
{
"acc": 0.9012291,
"epoch": 0.32727272727272727,
"grad_norm": 2.3552152207973713,
"learning_rate": 9.973654546348053e-06,
"loss": 0.25761139,
"memory(GiB)": 100.16,
"step": 45,
"train_speed(iter/s)": 0.319208
},
{
"acc": 0.9016325,
"epoch": 0.36363636363636365,
"grad_norm": 3.1153181076119703,
"learning_rate": 9.955904370333514e-06,
"loss": 0.24715631,
"memory(GiB)": 100.16,
"step": 50,
"train_speed(iter/s)": 0.32296
},
{
"acc": 0.89749699,
"epoch": 0.4,
"grad_norm": 2.4498466601081943,
"learning_rate": 9.933632729650212e-06,
"loss": 0.25689688,
"memory(GiB)": 100.16,
"step": 55,
"train_speed(iter/s)": 0.325846
},
{
"acc": 0.88724833,
"epoch": 0.43636363636363634,
"grad_norm": 4.364723865759911,
"learning_rate": 9.906859945633999e-06,
"loss": 0.28743353,
"memory(GiB)": 100.16,
"step": 60,
"train_speed(iter/s)": 0.328247
},
{
"acc": 0.90578156,
"epoch": 0.4727272727272727,
"grad_norm": 3.243778418144708,
"learning_rate": 9.875610446603524e-06,
"loss": 0.26308877,
"memory(GiB)": 100.16,
"step": 65,
"train_speed(iter/s)": 0.330485
},
{
"acc": 0.89676228,
"epoch": 0.509090909090909,
"grad_norm": 3.4165598224968274,
"learning_rate": 9.83991274557109e-06,
"loss": 0.26372042,
"memory(GiB)": 127.96,
"step": 70,
"train_speed(iter/s)": 0.332413
},
{
"acc": 0.9054903,
"epoch": 0.5454545454545454,
"grad_norm": 3.814636181453338,
"learning_rate": 9.7997994142265e-06,
"loss": 0.25466361,
"memory(GiB)": 127.96,
"step": 75,
"train_speed(iter/s)": 0.334379
},
{
"acc": 0.90086946,
"epoch": 0.5818181818181818,
"grad_norm": 3.9972259822599243,
"learning_rate": 9.755307053217622e-06,
"loss": 0.27588401,
"memory(GiB)": 127.96,
"step": 80,
"train_speed(iter/s)": 0.336004
},
{
"acc": 0.89949837,
"epoch": 0.6181818181818182,
"grad_norm": 5.998240972031008,
"learning_rate": 9.706476258754834e-06,
"loss": 0.25472341,
"memory(GiB)": 127.96,
"step": 85,
"train_speed(iter/s)": 0.337291
},
{
"acc": 0.88558121,
"epoch": 0.6545454545454545,
"grad_norm": 2.7186082929792574,
"learning_rate": 9.653351585569786e-06,
"loss": 0.28254557,
"memory(GiB)": 127.96,
"step": 90,
"train_speed(iter/s)": 0.337576
},
{
"acc": 0.90562687,
"epoch": 0.6909090909090909,
"grad_norm": 1.6880555029124777,
"learning_rate": 9.595981506262264e-06,
"loss": 0.25460241,
"memory(GiB)": 127.96,
"step": 95,
"train_speed(iter/s)": 0.338319
},
{
"acc": 0.90238457,
"epoch": 0.7272727272727273,
"grad_norm": 1.824873702466673,
"learning_rate": 9.534418367072303e-06,
"loss": 0.25135682,
"memory(GiB)": 127.96,
"step": 100,
"train_speed(iter/s)": 0.33935
},
{
"acc": 0.90719824,
"epoch": 0.7636363636363637,
"grad_norm": 3.0523518026276926,
"learning_rate": 9.468718340117846e-06,
"loss": 0.23181794,
"memory(GiB)": 127.96,
"step": 105,
"train_speed(iter/s)": 0.340475
},
{
"acc": 0.89296656,
"epoch": 0.8,
"grad_norm": 3.6744833597367514,
"learning_rate": 9.398941372141562e-06,
"loss": 0.27924564,
"memory(GiB)": 127.96,
"step": 110,
"train_speed(iter/s)": 0.341456
},
{
"acc": 0.89754677,
"epoch": 0.8363636363636363,
"grad_norm": 3.250222318126925,
"learning_rate": 9.325151129813582e-06,
"loss": 0.26513102,
"memory(GiB)": 127.96,
"step": 115,
"train_speed(iter/s)": 0.342153
},
{
"acc": 0.88903837,
"epoch": 0.8727272727272727,
"grad_norm": 2.376728799007849,
"learning_rate": 9.247414941640045e-06,
"loss": 0.30169072,
"memory(GiB)": 133.76,
"step": 120,
"train_speed(iter/s)": 0.342998
},
{
"acc": 0.89329395,
"epoch": 0.9090909090909091,
"grad_norm": 4.889478322316845,
"learning_rate": 9.165803736530492e-06,
"loss": 0.28302565,
"memory(GiB)": 100.58,
"step": 125,
"train_speed(iter/s)": 0.343779
},
{
"acc": 0.89977417,
"epoch": 0.9454545454545454,
"grad_norm": 2.0057917841024633,
"learning_rate": 9.080391979080116e-06,
"loss": 0.2668047,
"memory(GiB)": 100.58,
"step": 130,
"train_speed(iter/s)": 0.344351
},
{
"acc": 0.90148487,
"epoch": 0.9818181818181818,
"grad_norm": 2.470715179920895,
"learning_rate": 8.991257601625973e-06,
"loss": 0.25751991,
"memory(GiB)": 100.58,
"step": 135,
"train_speed(iter/s)": 0.345171
},
{
"epoch": 1.0,
"eval_acc": 0.9078246620237608,
"eval_loss": 0.2361508309841156,
"eval_runtime": 10.278,
"eval_samples_per_second": 11.286,
"eval_steps_per_second": 1.459,
"step": 138
},
{
"acc": 0.8134038,
"epoch": 1.0145454545454546,
"grad_norm": 1.9385369249323439,
"learning_rate": 8.917324354080927e-06,
"loss": 0.254459,
"memory(GiB)": 100.58,
"step": 140,
"train_speed(iter/s)": 0.309598
},
{
"acc": 0.90728855,
"epoch": 1.050909090909091,
"grad_norm": 76.54794008048425,
"learning_rate": 8.82169644486897e-06,
"loss": 0.23623853,
"memory(GiB)": 100.58,
"step": 145,
"train_speed(iter/s)": 0.311044
},
{
"acc": 0.91997566,
"epoch": 1.0872727272727274,
"grad_norm": 1.727673298537959,
"learning_rate": 8.722581957483633e-06,
"loss": 0.21817675,
"memory(GiB)": 100.58,
"step": 150,
"train_speed(iter/s)": 0.31275
},
{
"acc": 0.91184559,
"epoch": 1.1236363636363635,
"grad_norm": 2.4370845690665974,
"learning_rate": 8.620071327057833e-06,
"loss": 0.22411692,
"memory(GiB)": 100.58,
"step": 155,
"train_speed(iter/s)": 0.314364
},
{
"acc": 0.91105995,
"epoch": 1.16,
"grad_norm": 4.474578962221848,
"learning_rate": 8.514258087470745e-06,
"loss": 0.22455444,
"memory(GiB)": 100.58,
"step": 160,
"train_speed(iter/s)": 0.315941
},
{
"acc": 0.92596989,
"epoch": 1.1963636363636363,
"grad_norm": 2.27714865436083,
"learning_rate": 8.405238786004592e-06,
"loss": 0.19618599,
"memory(GiB)": 100.58,
"step": 165,
"train_speed(iter/s)": 0.317423
},
{
"acc": 0.91807003,
"epoch": 1.2327272727272727,
"grad_norm": 3.476526282944283,
"learning_rate": 8.293112895251915e-06,
"loss": 0.21812358,
"memory(GiB)": 100.58,
"step": 170,
"train_speed(iter/s)": 0.318837
},
{
"acc": 0.91757879,
"epoch": 1.269090909090909,
"grad_norm": 2.812345046742586,
"learning_rate": 8.177982722353686e-06,
"loss": 0.20932765,
"memory(GiB)": 100.58,
"step": 175,
"train_speed(iter/s)": 0.319897
},
{
"acc": 0.9130724,
"epoch": 1.3054545454545454,
"grad_norm": 1.909403498812979,
"learning_rate": 8.059953315651102e-06,
"loss": 0.22100675,
"memory(GiB)": 100.58,
"step": 180,
"train_speed(iter/s)": 0.320821
},
{
"acc": 0.91083689,
"epoch": 1.3418181818181818,
"grad_norm": 3.7534483781265853,
"learning_rate": 7.93913236883622e-06,
"loss": 0.22075479,
"memory(GiB)": 100.58,
"step": 185,
"train_speed(iter/s)": 0.321724
},
{
"acc": 0.90749474,
"epoch": 1.3781818181818182,
"grad_norm": 3.0657460772043805,
"learning_rate": 7.815630122688893e-06,
"loss": 0.22630196,
"memory(GiB)": 100.58,
"step": 190,
"train_speed(iter/s)": 0.3226
},
{
"acc": 0.92584915,
"epoch": 1.4145454545454546,
"grad_norm": 5.821099128946982,
"learning_rate": 7.689559264489661e-06,
"loss": 0.21087196,
"memory(GiB)": 100.58,
"step": 195,
"train_speed(iter/s)": 0.32333
},
{
"acc": 0.90973835,
"epoch": 1.450909090909091,
"grad_norm": 1.830285233435649,
"learning_rate": 7.5610348252003814e-06,
"loss": 0.24081864,
"memory(GiB)": 100.58,
"step": 200,
"train_speed(iter/s)": 0.323755
},
{
"acc": 0.91908627,
"epoch": 1.4872727272727273,
"grad_norm": 3.46434543645635,
"learning_rate": 7.43017407450641e-06,
"loss": 0.21430855,
"memory(GiB)": 100.58,
"step": 205,
"train_speed(iter/s)": 0.324304
},
{
"acc": 0.90855217,
"epoch": 1.5236363636363637,
"grad_norm": 1.6445934060533671,
"learning_rate": 7.2970964138161006e-06,
"loss": 0.2204694,
"memory(GiB)": 100.58,
"step": 210,
"train_speed(iter/s)": 0.325137
},
{
"acc": 0.9202652,
"epoch": 1.56,
"grad_norm": 2.685739587728944,
"learning_rate": 7.161923267315262e-06,
"loss": 0.20784543,
"memory(GiB)": 100.58,
"step": 215,
"train_speed(iter/s)": 0.325877
},
{
"acc": 0.92430801,
"epoch": 1.5963636363636362,
"grad_norm": 3.4665236755524202,
"learning_rate": 7.0247779711759566e-06,
"loss": 0.2091445,
"memory(GiB)": 100.58,
"step": 220,
"train_speed(iter/s)": 0.326598
},
{
"acc": 0.91858587,
"epoch": 1.6327272727272728,
"grad_norm": 3.0400419237318674,
"learning_rate": 6.885785661020759e-06,
"loss": 0.22234173,
"memory(GiB)": 100.58,
"step": 225,
"train_speed(iter/s)": 0.32754
},
{
"acc": 0.91896229,
"epoch": 1.669090909090909,
"grad_norm": 2.50023791606214,
"learning_rate": 6.7450731577451255e-06,
"loss": 0.20558548,
"memory(GiB)": 100.58,
"step": 230,
"train_speed(iter/s)": 0.328407
},
{
"acc": 0.92307997,
"epoch": 1.7054545454545456,
"grad_norm": 2.789509587118081,
"learning_rate": 6.602768851802077e-06,
"loss": 0.21382501,
"memory(GiB)": 100.58,
"step": 235,
"train_speed(iter/s)": 0.329247
},
{
"acc": 0.91400127,
"epoch": 1.7418181818181817,
"grad_norm": 2.3889266426439173,
"learning_rate": 6.45900258605477e-06,
"loss": 0.21889751,
"memory(GiB)": 100.58,
"step": 240,
"train_speed(iter/s)": 0.330086
},
{
"acc": 0.90683708,
"epoch": 1.7781818181818183,
"grad_norm": 3.3107240552086465,
"learning_rate": 6.313905537303837e-06,
"loss": 0.21690502,
"memory(GiB)": 100.58,
"step": 245,
"train_speed(iter/s)": 0.330898
},
{
"acc": 0.91603336,
"epoch": 1.8145454545454545,
"grad_norm": 2.8852486239120547,
"learning_rate": 6.167610096597601e-06,
"loss": 0.2154119,
"memory(GiB)": 100.58,
"step": 250,
"train_speed(iter/s)": 0.331673
},
{
"acc": 0.91818409,
"epoch": 1.850909090909091,
"grad_norm": 2.0440810660323585,
"learning_rate": 6.020249748434384e-06,
"loss": 0.21951377,
"memory(GiB)": 100.58,
"step": 255,
"train_speed(iter/s)": 0.332356
},
{
"acc": 0.90970173,
"epoch": 1.8872727272727272,
"grad_norm": 3.8117037313040574,
"learning_rate": 5.871958948967106e-06,
"loss": 0.23594971,
"memory(GiB)": 100.58,
"step": 260,
"train_speed(iter/s)": 0.33293
},
{
"acc": 0.92123165,
"epoch": 1.9236363636363636,
"grad_norm": 3.4855685769436375,
"learning_rate": 5.722873003321322e-06,
"loss": 0.21117101,
"memory(GiB)": 100.58,
"step": 265,
"train_speed(iter/s)": 0.333662
},
{
"acc": 0.91777382,
"epoch": 1.96,
"grad_norm": 2.497000906964384,
"learning_rate": 5.573127942138622e-06,
"loss": 0.21624155,
"memory(GiB)": 100.58,
"step": 270,
"train_speed(iter/s)": 0.334225
},
{
"acc": 0.9166666,
"epoch": 1.9963636363636363,
"grad_norm": 4.782654736901845,
"learning_rate": 5.422860397458064e-06,
"loss": 0.21392875,
"memory(GiB)": 100.58,
"step": 275,
"train_speed(iter/s)": 0.334671
},
{
"epoch": 2.0,
"eval_acc": 0.9098730028676771,
"eval_loss": 0.2191523164510727,
"eval_runtime": 10.1618,
"eval_samples_per_second": 11.415,
"eval_steps_per_second": 1.476,
"step": 276
},
{
"acc": 0.84443541,
"epoch": 2.0290909090909093,
"grad_norm": 3.015403395241152,
"learning_rate": 5.27220747804885e-06,
"loss": 0.17099829,
"memory(GiB)": 100.58,
"step": 280,
"train_speed(iter/s)": 0.317633
},
{
"acc": 0.93253222,
"epoch": 2.0654545454545454,
"grad_norm": 2.167435558475328,
"learning_rate": 5.121306644308045e-06,
"loss": 0.18818057,
"memory(GiB)": 100.58,
"step": 285,
"train_speed(iter/s)": 0.3185
},
{
"acc": 0.94647446,
"epoch": 2.101818181818182,
"grad_norm": 2.1487311628542898,
"learning_rate": 4.9702955828374385e-06,
"loss": 0.15134431,
"memory(GiB)": 100.58,
"step": 290,
"train_speed(iter/s)": 0.319277
},
{
"acc": 0.93036728,
"epoch": 2.138181818181818,
"grad_norm": 4.174051904681519,
"learning_rate": 4.8193120808140185e-06,
"loss": 0.16832316,
"memory(GiB)": 100.58,
"step": 295,
"train_speed(iter/s)": 0.320077
},
{
"acc": 0.93621769,
"epoch": 2.174545454545455,
"grad_norm": 2.3866390406657896,
"learning_rate": 4.668493900268684e-06,
"loss": 0.16947901,
"memory(GiB)": 100.58,
"step": 300,
"train_speed(iter/s)": 0.320854
},
{
"acc": 0.93184824,
"epoch": 2.210909090909091,
"grad_norm": 2.7745369730901595,
"learning_rate": 4.517978652387882e-06,
"loss": 0.16975009,
"memory(GiB)": 100.58,
"step": 305,
"train_speed(iter/s)": 0.321626
},
{
"acc": 0.93711929,
"epoch": 2.247272727272727,
"grad_norm": 4.606104787695004,
"learning_rate": 4.367903671952906e-06,
"loss": 0.16885712,
"memory(GiB)": 100.58,
"step": 310,
"train_speed(iter/s)": 0.322203
},
{
"acc": 0.93099174,
"epoch": 2.2836363636363637,
"grad_norm": 8.944877147631175,
"learning_rate": 4.218405892031366e-06,
"loss": 0.17090337,
"memory(GiB)": 100.58,
"step": 315,
"train_speed(iter/s)": 0.322833
},
{
"acc": 0.93137035,
"epoch": 2.32,
"grad_norm": 4.336121777570645,
"learning_rate": 4.069621719035229e-06,
"loss": 0.1658249,
"memory(GiB)": 100.58,
"step": 320,
"train_speed(iter/s)": 0.323508
},
{
"acc": 0.9393259,
"epoch": 2.3563636363636364,
"grad_norm": 6.921537975970479,
"learning_rate": 3.921686908259354e-06,
"loss": 0.15576041,
"memory(GiB)": 100.58,
"step": 325,
"train_speed(iter/s)": 0.324182
},
{
"acc": 0.93962708,
"epoch": 2.3927272727272726,
"grad_norm": 3.5886891547630877,
"learning_rate": 3.7747364400141726e-06,
"loss": 0.16867373,
"memory(GiB)": 100.58,
"step": 330,
"train_speed(iter/s)": 0.324849
},
{
"acc": 0.93609505,
"epoch": 2.429090909090909,
"grad_norm": 2.686999433312404,
"learning_rate": 3.6289043964654526e-06,
"loss": 0.15810946,
"memory(GiB)": 100.58,
"step": 335,
"train_speed(iter/s)": 0.325493
},
{
"acc": 0.92649899,
"epoch": 2.4654545454545453,
"grad_norm": 2.591872854237207,
"learning_rate": 3.484323839293575e-06,
"loss": 0.17918372,
"memory(GiB)": 100.58,
"step": 340,
"train_speed(iter/s)": 0.326123
},
{
"acc": 0.93626881,
"epoch": 2.501818181818182,
"grad_norm": 2.5738296672570233,
"learning_rate": 3.341126688283922e-06,
"loss": 0.16855428,
"memory(GiB)": 100.58,
"step": 345,
"train_speed(iter/s)": 0.326743
},
{
"acc": 0.93825417,
"epoch": 2.538181818181818,
"grad_norm": 2.7529925608546466,
"learning_rate": 3.19944360095919e-06,
"loss": 0.16165339,
"memory(GiB)": 100.58,
"step": 350,
"train_speed(iter/s)": 0.327363
},
{
"acc": 0.94702225,
"epoch": 2.5745454545454547,
"grad_norm": 2.9545927202945315,
"learning_rate": 3.059403853363393e-06,
"loss": 0.14523516,
"memory(GiB)": 100.58,
"step": 355,
"train_speed(iter/s)": 0.327926
},
{
"acc": 0.94346981,
"epoch": 2.610909090909091,
"grad_norm": 4.047109124196383,
"learning_rate": 2.9211352221063987e-06,
"loss": 0.14715908,
"memory(GiB)": 100.58,
"step": 360,
"train_speed(iter/s)": 0.328285
},
{
"acc": 0.94318542,
"epoch": 2.6472727272727274,
"grad_norm": 2.3923230638690143,
"learning_rate": 2.7847638677765936e-06,
"loss": 0.1494684,
"memory(GiB)": 100.58,
"step": 365,
"train_speed(iter/s)": 0.328722
},
{
"acc": 0.95623245,
"epoch": 2.6836363636363636,
"grad_norm": 2.457260493406828,
"learning_rate": 2.650414219828032e-06,
"loss": 0.11759402,
"memory(GiB)": 100.58,
"step": 370,
"train_speed(iter/s)": 0.329264
},
{
"acc": 0.94435921,
"epoch": 2.7199999999999998,
"grad_norm": 1.5322367904545142,
"learning_rate": 2.5182088630471517e-06,
"loss": 0.13577256,
"memory(GiB)": 100.58,
"step": 375,
"train_speed(iter/s)": 0.329788
},
{
"acc": 0.94585953,
"epoch": 2.7563636363636363,
"grad_norm": 2.8650025435958666,
"learning_rate": 2.388268425702614e-06,
"loss": 0.14076474,
"memory(GiB)": 100.58,
"step": 380,
"train_speed(iter/s)": 0.330302
},
{
"acc": 0.9413455,
"epoch": 2.792727272727273,
"grad_norm": 4.510750432829035,
"learning_rate": 2.2607114694803263e-06,
"loss": 0.1642381,
"memory(GiB)": 100.58,
"step": 385,
"train_speed(iter/s)": 0.330731
},
{
"acc": 0.93006382,
"epoch": 2.829090909090909,
"grad_norm": 2.908591189518448,
"learning_rate": 2.1356543813040863e-06,
"loss": 0.17094066,
"memory(GiB)": 100.58,
"step": 390,
"train_speed(iter/s)": 0.331119
},
{
"acc": 0.94227448,
"epoch": 2.8654545454545453,
"grad_norm": 2.331626905910975,
"learning_rate": 2.0132112671405244e-06,
"loss": 0.14904225,
"memory(GiB)": 100.58,
"step": 395,
"train_speed(iter/s)": 0.331532
},
{
"acc": 0.93090382,
"epoch": 2.901818181818182,
"grad_norm": 4.223665768837086,
"learning_rate": 1.8934938478853108e-06,
"loss": 0.17768097,
"memory(GiB)": 100.58,
"step": 400,
"train_speed(iter/s)": 0.331963
},
{
"acc": 0.93722563,
"epoch": 2.9381818181818184,
"grad_norm": 2.7247775486261734,
"learning_rate": 1.7766113574255145e-06,
"loss": 0.15059752,
"memory(GiB)": 100.58,
"step": 405,
"train_speed(iter/s)": 0.332266
},
{
"acc": 0.94374504,
"epoch": 2.9745454545454546,
"grad_norm": 2.9951618135706055,
"learning_rate": 1.6626704429712411e-06,
"loss": 0.14953468,
"memory(GiB)": 100.58,
"step": 410,
"train_speed(iter/s)": 0.332599
},
{
"epoch": 3.0,
"eval_acc": 0.9192953707496927,
"eval_loss": 0.21225064992904663,
"eval_runtime": 9.5239,
"eval_samples_per_second": 12.18,
"eval_steps_per_second": 1.575,
"step": 414
}
],
"logging_steps": 5,
"max_steps": 548,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.931788793840435e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}