|
{ |
|
"best_metric": 0.21225065, |
|
"best_model_checkpoint": "/workspace/output/molmo-7b-d/v1-20250103-233013/checkpoint-414", |
|
"epoch": 3.0, |
|
"eval_steps": 200, |
|
"global_step": 414, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.93268561, |
|
"epoch": 0.007272727272727273, |
|
"grad_norm": 4.360905168604235, |
|
"learning_rate": 0.0, |
|
"loss": 0.15919656, |
|
"memory(GiB)": 131.1, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.051814 |
|
}, |
|
{ |
|
"acc": 0.93592656, |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 5.722024176509264, |
|
"learning_rate": 4.829949384917788e-06, |
|
"loss": 0.16946605, |
|
"memory(GiB)": 131.7, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.164947 |
|
}, |
|
{ |
|
"acc": 0.92745094, |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 5.449760109713864, |
|
"learning_rate": 6.910095361682884e-06, |
|
"loss": 0.19423571, |
|
"memory(GiB)": 131.7, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.228063 |
|
}, |
|
{ |
|
"acc": 0.9191308, |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 5.591882854054257, |
|
"learning_rate": 8.126902754116446e-06, |
|
"loss": 0.2132081, |
|
"memory(GiB)": 131.7, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.262462 |
|
}, |
|
{ |
|
"acc": 0.91665707, |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 8.643333147328232, |
|
"learning_rate": 8.990241338447979e-06, |
|
"loss": 0.2489913, |
|
"memory(GiB)": 132.85, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.283686 |
|
}, |
|
{ |
|
"acc": 0.92767944, |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 5.521300358752013, |
|
"learning_rate": 9.659898769835576e-06, |
|
"loss": 0.20275159, |
|
"memory(GiB)": 100.16, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.297772 |
|
}, |
|
{ |
|
"acc": 0.91237392, |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 4.691287833576054, |
|
"learning_rate": 9.999635040777627e-06, |
|
"loss": 0.24152677, |
|
"memory(GiB)": 100.16, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.306552 |
|
}, |
|
{ |
|
"acc": 0.89811802, |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 2.3267326177072176, |
|
"learning_rate": 9.995529861428146e-06, |
|
"loss": 0.2682821, |
|
"memory(GiB)": 100.16, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.312152 |
|
}, |
|
{ |
|
"acc": 0.91184635, |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 3.241573273096398, |
|
"learning_rate": 9.986867061882612e-06, |
|
"loss": 0.23578806, |
|
"memory(GiB)": 100.16, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.314978 |
|
}, |
|
{ |
|
"acc": 0.9012291, |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 2.3552152207973713, |
|
"learning_rate": 9.973654546348053e-06, |
|
"loss": 0.25761139, |
|
"memory(GiB)": 100.16, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.319208 |
|
}, |
|
{ |
|
"acc": 0.9016325, |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 3.1153181076119703, |
|
"learning_rate": 9.955904370333514e-06, |
|
"loss": 0.24715631, |
|
"memory(GiB)": 100.16, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.32296 |
|
}, |
|
{ |
|
"acc": 0.89749699, |
|
"epoch": 0.4, |
|
"grad_norm": 2.4498466601081943, |
|
"learning_rate": 9.933632729650212e-06, |
|
"loss": 0.25689688, |
|
"memory(GiB)": 100.16, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.325846 |
|
}, |
|
{ |
|
"acc": 0.88724833, |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 4.364723865759911, |
|
"learning_rate": 9.906859945633999e-06, |
|
"loss": 0.28743353, |
|
"memory(GiB)": 100.16, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.328247 |
|
}, |
|
{ |
|
"acc": 0.90578156, |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 3.243778418144708, |
|
"learning_rate": 9.875610446603524e-06, |
|
"loss": 0.26308877, |
|
"memory(GiB)": 100.16, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.330485 |
|
}, |
|
{ |
|
"acc": 0.89676228, |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 3.4165598224968274, |
|
"learning_rate": 9.83991274557109e-06, |
|
"loss": 0.26372042, |
|
"memory(GiB)": 127.96, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.332413 |
|
}, |
|
{ |
|
"acc": 0.9054903, |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 3.814636181453338, |
|
"learning_rate": 9.7997994142265e-06, |
|
"loss": 0.25466361, |
|
"memory(GiB)": 127.96, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.334379 |
|
}, |
|
{ |
|
"acc": 0.90086946, |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 3.9972259822599243, |
|
"learning_rate": 9.755307053217622e-06, |
|
"loss": 0.27588401, |
|
"memory(GiB)": 127.96, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.336004 |
|
}, |
|
{ |
|
"acc": 0.89949837, |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 5.998240972031008, |
|
"learning_rate": 9.706476258754834e-06, |
|
"loss": 0.25472341, |
|
"memory(GiB)": 127.96, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.337291 |
|
}, |
|
{ |
|
"acc": 0.88558121, |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 2.7186082929792574, |
|
"learning_rate": 9.653351585569786e-06, |
|
"loss": 0.28254557, |
|
"memory(GiB)": 127.96, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.337576 |
|
}, |
|
{ |
|
"acc": 0.90562687, |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 1.6880555029124777, |
|
"learning_rate": 9.595981506262264e-06, |
|
"loss": 0.25460241, |
|
"memory(GiB)": 127.96, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.338319 |
|
}, |
|
{ |
|
"acc": 0.90238457, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 1.824873702466673, |
|
"learning_rate": 9.534418367072303e-06, |
|
"loss": 0.25135682, |
|
"memory(GiB)": 127.96, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.33935 |
|
}, |
|
{ |
|
"acc": 0.90719824, |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 3.0523518026276926, |
|
"learning_rate": 9.468718340117846e-06, |
|
"loss": 0.23181794, |
|
"memory(GiB)": 127.96, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.340475 |
|
}, |
|
{ |
|
"acc": 0.89296656, |
|
"epoch": 0.8, |
|
"grad_norm": 3.6744833597367514, |
|
"learning_rate": 9.398941372141562e-06, |
|
"loss": 0.27924564, |
|
"memory(GiB)": 127.96, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.341456 |
|
}, |
|
{ |
|
"acc": 0.89754677, |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 3.250222318126925, |
|
"learning_rate": 9.325151129813582e-06, |
|
"loss": 0.26513102, |
|
"memory(GiB)": 127.96, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.342153 |
|
}, |
|
{ |
|
"acc": 0.88903837, |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.376728799007849, |
|
"learning_rate": 9.247414941640045e-06, |
|
"loss": 0.30169072, |
|
"memory(GiB)": 133.76, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.342998 |
|
}, |
|
{ |
|
"acc": 0.89329395, |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 4.889478322316845, |
|
"learning_rate": 9.165803736530492e-06, |
|
"loss": 0.28302565, |
|
"memory(GiB)": 100.58, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.343779 |
|
}, |
|
{ |
|
"acc": 0.89977417, |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 2.0057917841024633, |
|
"learning_rate": 9.080391979080116e-06, |
|
"loss": 0.2668047, |
|
"memory(GiB)": 100.58, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.344351 |
|
}, |
|
{ |
|
"acc": 0.90148487, |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 2.470715179920895, |
|
"learning_rate": 8.991257601625973e-06, |
|
"loss": 0.25751991, |
|
"memory(GiB)": 100.58, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.345171 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc": 0.9078246620237608, |
|
"eval_loss": 0.2361508309841156, |
|
"eval_runtime": 10.278, |
|
"eval_samples_per_second": 11.286, |
|
"eval_steps_per_second": 1.459, |
|
"step": 138 |
|
}, |
|
{ |
|
"acc": 0.8134038, |
|
"epoch": 1.0145454545454546, |
|
"grad_norm": 1.9385369249323439, |
|
"learning_rate": 8.917324354080927e-06, |
|
"loss": 0.254459, |
|
"memory(GiB)": 100.58, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.309598 |
|
}, |
|
{ |
|
"acc": 0.90728855, |
|
"epoch": 1.050909090909091, |
|
"grad_norm": 76.54794008048425, |
|
"learning_rate": 8.82169644486897e-06, |
|
"loss": 0.23623853, |
|
"memory(GiB)": 100.58, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.311044 |
|
}, |
|
{ |
|
"acc": 0.91997566, |
|
"epoch": 1.0872727272727274, |
|
"grad_norm": 1.727673298537959, |
|
"learning_rate": 8.722581957483633e-06, |
|
"loss": 0.21817675, |
|
"memory(GiB)": 100.58, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.31275 |
|
}, |
|
{ |
|
"acc": 0.91184559, |
|
"epoch": 1.1236363636363635, |
|
"grad_norm": 2.4370845690665974, |
|
"learning_rate": 8.620071327057833e-06, |
|
"loss": 0.22411692, |
|
"memory(GiB)": 100.58, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.314364 |
|
}, |
|
{ |
|
"acc": 0.91105995, |
|
"epoch": 1.16, |
|
"grad_norm": 4.474578962221848, |
|
"learning_rate": 8.514258087470745e-06, |
|
"loss": 0.22455444, |
|
"memory(GiB)": 100.58, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.315941 |
|
}, |
|
{ |
|
"acc": 0.92596989, |
|
"epoch": 1.1963636363636363, |
|
"grad_norm": 2.27714865436083, |
|
"learning_rate": 8.405238786004592e-06, |
|
"loss": 0.19618599, |
|
"memory(GiB)": 100.58, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.317423 |
|
}, |
|
{ |
|
"acc": 0.91807003, |
|
"epoch": 1.2327272727272727, |
|
"grad_norm": 3.476526282944283, |
|
"learning_rate": 8.293112895251915e-06, |
|
"loss": 0.21812358, |
|
"memory(GiB)": 100.58, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.318837 |
|
}, |
|
{ |
|
"acc": 0.91757879, |
|
"epoch": 1.269090909090909, |
|
"grad_norm": 2.812345046742586, |
|
"learning_rate": 8.177982722353686e-06, |
|
"loss": 0.20932765, |
|
"memory(GiB)": 100.58, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.319897 |
|
}, |
|
{ |
|
"acc": 0.9130724, |
|
"epoch": 1.3054545454545454, |
|
"grad_norm": 1.909403498812979, |
|
"learning_rate": 8.059953315651102e-06, |
|
"loss": 0.22100675, |
|
"memory(GiB)": 100.58, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.320821 |
|
}, |
|
{ |
|
"acc": 0.91083689, |
|
"epoch": 1.3418181818181818, |
|
"grad_norm": 3.7534483781265853, |
|
"learning_rate": 7.93913236883622e-06, |
|
"loss": 0.22075479, |
|
"memory(GiB)": 100.58, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.321724 |
|
}, |
|
{ |
|
"acc": 0.90749474, |
|
"epoch": 1.3781818181818182, |
|
"grad_norm": 3.0657460772043805, |
|
"learning_rate": 7.815630122688893e-06, |
|
"loss": 0.22630196, |
|
"memory(GiB)": 100.58, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.3226 |
|
}, |
|
{ |
|
"acc": 0.92584915, |
|
"epoch": 1.4145454545454546, |
|
"grad_norm": 5.821099128946982, |
|
"learning_rate": 7.689559264489661e-06, |
|
"loss": 0.21087196, |
|
"memory(GiB)": 100.58, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.32333 |
|
}, |
|
{ |
|
"acc": 0.90973835, |
|
"epoch": 1.450909090909091, |
|
"grad_norm": 1.830285233435649, |
|
"learning_rate": 7.5610348252003814e-06, |
|
"loss": 0.24081864, |
|
"memory(GiB)": 100.58, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.323755 |
|
}, |
|
{ |
|
"acc": 0.91908627, |
|
"epoch": 1.4872727272727273, |
|
"grad_norm": 3.46434543645635, |
|
"learning_rate": 7.43017407450641e-06, |
|
"loss": 0.21430855, |
|
"memory(GiB)": 100.58, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.324304 |
|
}, |
|
{ |
|
"acc": 0.90855217, |
|
"epoch": 1.5236363636363637, |
|
"grad_norm": 1.6445934060533671, |
|
"learning_rate": 7.2970964138161006e-06, |
|
"loss": 0.2204694, |
|
"memory(GiB)": 100.58, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.325137 |
|
}, |
|
{ |
|
"acc": 0.9202652, |
|
"epoch": 1.56, |
|
"grad_norm": 2.685739587728944, |
|
"learning_rate": 7.161923267315262e-06, |
|
"loss": 0.20784543, |
|
"memory(GiB)": 100.58, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.325877 |
|
}, |
|
{ |
|
"acc": 0.92430801, |
|
"epoch": 1.5963636363636362, |
|
"grad_norm": 3.4665236755524202, |
|
"learning_rate": 7.0247779711759566e-06, |
|
"loss": 0.2091445, |
|
"memory(GiB)": 100.58, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.326598 |
|
}, |
|
{ |
|
"acc": 0.91858587, |
|
"epoch": 1.6327272727272728, |
|
"grad_norm": 3.0400419237318674, |
|
"learning_rate": 6.885785661020759e-06, |
|
"loss": 0.22234173, |
|
"memory(GiB)": 100.58, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.32754 |
|
}, |
|
{ |
|
"acc": 0.91896229, |
|
"epoch": 1.669090909090909, |
|
"grad_norm": 2.50023791606214, |
|
"learning_rate": 6.7450731577451255e-06, |
|
"loss": 0.20558548, |
|
"memory(GiB)": 100.58, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.328407 |
|
}, |
|
{ |
|
"acc": 0.92307997, |
|
"epoch": 1.7054545454545456, |
|
"grad_norm": 2.789509587118081, |
|
"learning_rate": 6.602768851802077e-06, |
|
"loss": 0.21382501, |
|
"memory(GiB)": 100.58, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.329247 |
|
}, |
|
{ |
|
"acc": 0.91400127, |
|
"epoch": 1.7418181818181817, |
|
"grad_norm": 2.3889266426439173, |
|
"learning_rate": 6.45900258605477e-06, |
|
"loss": 0.21889751, |
|
"memory(GiB)": 100.58, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.330086 |
|
}, |
|
{ |
|
"acc": 0.90683708, |
|
"epoch": 1.7781818181818183, |
|
"grad_norm": 3.3107240552086465, |
|
"learning_rate": 6.313905537303837e-06, |
|
"loss": 0.21690502, |
|
"memory(GiB)": 100.58, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.330898 |
|
}, |
|
{ |
|
"acc": 0.91603336, |
|
"epoch": 1.8145454545454545, |
|
"grad_norm": 2.8852486239120547, |
|
"learning_rate": 6.167610096597601e-06, |
|
"loss": 0.2154119, |
|
"memory(GiB)": 100.58, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.331673 |
|
}, |
|
{ |
|
"acc": 0.91818409, |
|
"epoch": 1.850909090909091, |
|
"grad_norm": 2.0440810660323585, |
|
"learning_rate": 6.020249748434384e-06, |
|
"loss": 0.21951377, |
|
"memory(GiB)": 100.58, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.332356 |
|
}, |
|
{ |
|
"acc": 0.90970173, |
|
"epoch": 1.8872727272727272, |
|
"grad_norm": 3.8117037313040574, |
|
"learning_rate": 5.871958948967106e-06, |
|
"loss": 0.23594971, |
|
"memory(GiB)": 100.58, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.33293 |
|
}, |
|
{ |
|
"acc": 0.92123165, |
|
"epoch": 1.9236363636363636, |
|
"grad_norm": 3.4855685769436375, |
|
"learning_rate": 5.722873003321322e-06, |
|
"loss": 0.21117101, |
|
"memory(GiB)": 100.58, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.333662 |
|
}, |
|
{ |
|
"acc": 0.91777382, |
|
"epoch": 1.96, |
|
"grad_norm": 2.497000906964384, |
|
"learning_rate": 5.573127942138622e-06, |
|
"loss": 0.21624155, |
|
"memory(GiB)": 100.58, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.334225 |
|
}, |
|
{ |
|
"acc": 0.9166666, |
|
"epoch": 1.9963636363636363, |
|
"grad_norm": 4.782654736901845, |
|
"learning_rate": 5.422860397458064e-06, |
|
"loss": 0.21392875, |
|
"memory(GiB)": 100.58, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.334671 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc": 0.9098730028676771, |
|
"eval_loss": 0.2191523164510727, |
|
"eval_runtime": 10.1618, |
|
"eval_samples_per_second": 11.415, |
|
"eval_steps_per_second": 1.476, |
|
"step": 276 |
|
}, |
|
{ |
|
"acc": 0.84443541, |
|
"epoch": 2.0290909090909093, |
|
"grad_norm": 3.015403395241152, |
|
"learning_rate": 5.27220747804885e-06, |
|
"loss": 0.17099829, |
|
"memory(GiB)": 100.58, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.317633 |
|
}, |
|
{ |
|
"acc": 0.93253222, |
|
"epoch": 2.0654545454545454, |
|
"grad_norm": 2.167435558475328, |
|
"learning_rate": 5.121306644308045e-06, |
|
"loss": 0.18818057, |
|
"memory(GiB)": 100.58, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.3185 |
|
}, |
|
{ |
|
"acc": 0.94647446, |
|
"epoch": 2.101818181818182, |
|
"grad_norm": 2.1487311628542898, |
|
"learning_rate": 4.9702955828374385e-06, |
|
"loss": 0.15134431, |
|
"memory(GiB)": 100.58, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.319277 |
|
}, |
|
{ |
|
"acc": 0.93036728, |
|
"epoch": 2.138181818181818, |
|
"grad_norm": 4.174051904681519, |
|
"learning_rate": 4.8193120808140185e-06, |
|
"loss": 0.16832316, |
|
"memory(GiB)": 100.58, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.320077 |
|
}, |
|
{ |
|
"acc": 0.93621769, |
|
"epoch": 2.174545454545455, |
|
"grad_norm": 2.3866390406657896, |
|
"learning_rate": 4.668493900268684e-06, |
|
"loss": 0.16947901, |
|
"memory(GiB)": 100.58, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.320854 |
|
}, |
|
{ |
|
"acc": 0.93184824, |
|
"epoch": 2.210909090909091, |
|
"grad_norm": 2.7745369730901595, |
|
"learning_rate": 4.517978652387882e-06, |
|
"loss": 0.16975009, |
|
"memory(GiB)": 100.58, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.321626 |
|
}, |
|
{ |
|
"acc": 0.93711929, |
|
"epoch": 2.247272727272727, |
|
"grad_norm": 4.606104787695004, |
|
"learning_rate": 4.367903671952906e-06, |
|
"loss": 0.16885712, |
|
"memory(GiB)": 100.58, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.322203 |
|
}, |
|
{ |
|
"acc": 0.93099174, |
|
"epoch": 2.2836363636363637, |
|
"grad_norm": 8.944877147631175, |
|
"learning_rate": 4.218405892031366e-06, |
|
"loss": 0.17090337, |
|
"memory(GiB)": 100.58, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.322833 |
|
}, |
|
{ |
|
"acc": 0.93137035, |
|
"epoch": 2.32, |
|
"grad_norm": 4.336121777570645, |
|
"learning_rate": 4.069621719035229e-06, |
|
"loss": 0.1658249, |
|
"memory(GiB)": 100.58, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.323508 |
|
}, |
|
{ |
|
"acc": 0.9393259, |
|
"epoch": 2.3563636363636364, |
|
"grad_norm": 6.921537975970479, |
|
"learning_rate": 3.921686908259354e-06, |
|
"loss": 0.15576041, |
|
"memory(GiB)": 100.58, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.324182 |
|
}, |
|
{ |
|
"acc": 0.93962708, |
|
"epoch": 2.3927272727272726, |
|
"grad_norm": 3.5886891547630877, |
|
"learning_rate": 3.7747364400141726e-06, |
|
"loss": 0.16867373, |
|
"memory(GiB)": 100.58, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.324849 |
|
}, |
|
{ |
|
"acc": 0.93609505, |
|
"epoch": 2.429090909090909, |
|
"grad_norm": 2.686999433312404, |
|
"learning_rate": 3.6289043964654526e-06, |
|
"loss": 0.15810946, |
|
"memory(GiB)": 100.58, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.325493 |
|
}, |
|
{ |
|
"acc": 0.92649899, |
|
"epoch": 2.4654545454545453, |
|
"grad_norm": 2.591872854237207, |
|
"learning_rate": 3.484323839293575e-06, |
|
"loss": 0.17918372, |
|
"memory(GiB)": 100.58, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.326123 |
|
}, |
|
{ |
|
"acc": 0.93626881, |
|
"epoch": 2.501818181818182, |
|
"grad_norm": 2.5738296672570233, |
|
"learning_rate": 3.341126688283922e-06, |
|
"loss": 0.16855428, |
|
"memory(GiB)": 100.58, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.326743 |
|
}, |
|
{ |
|
"acc": 0.93825417, |
|
"epoch": 2.538181818181818, |
|
"grad_norm": 2.7529925608546466, |
|
"learning_rate": 3.19944360095919e-06, |
|
"loss": 0.16165339, |
|
"memory(GiB)": 100.58, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.327363 |
|
}, |
|
{ |
|
"acc": 0.94702225, |
|
"epoch": 2.5745454545454547, |
|
"grad_norm": 2.9545927202945315, |
|
"learning_rate": 3.059403853363393e-06, |
|
"loss": 0.14523516, |
|
"memory(GiB)": 100.58, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.327926 |
|
}, |
|
{ |
|
"acc": 0.94346981, |
|
"epoch": 2.610909090909091, |
|
"grad_norm": 4.047109124196383, |
|
"learning_rate": 2.9211352221063987e-06, |
|
"loss": 0.14715908, |
|
"memory(GiB)": 100.58, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.328285 |
|
}, |
|
{ |
|
"acc": 0.94318542, |
|
"epoch": 2.6472727272727274, |
|
"grad_norm": 2.3923230638690143, |
|
"learning_rate": 2.7847638677765936e-06, |
|
"loss": 0.1494684, |
|
"memory(GiB)": 100.58, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.328722 |
|
}, |
|
{ |
|
"acc": 0.95623245, |
|
"epoch": 2.6836363636363636, |
|
"grad_norm": 2.457260493406828, |
|
"learning_rate": 2.650414219828032e-06, |
|
"loss": 0.11759402, |
|
"memory(GiB)": 100.58, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.329264 |
|
}, |
|
{ |
|
"acc": 0.94435921, |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 1.5322367904545142, |
|
"learning_rate": 2.5182088630471517e-06, |
|
"loss": 0.13577256, |
|
"memory(GiB)": 100.58, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.329788 |
|
}, |
|
{ |
|
"acc": 0.94585953, |
|
"epoch": 2.7563636363636363, |
|
"grad_norm": 2.8650025435958666, |
|
"learning_rate": 2.388268425702614e-06, |
|
"loss": 0.14076474, |
|
"memory(GiB)": 100.58, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.330302 |
|
}, |
|
{ |
|
"acc": 0.9413455, |
|
"epoch": 2.792727272727273, |
|
"grad_norm": 4.510750432829035, |
|
"learning_rate": 2.2607114694803263e-06, |
|
"loss": 0.1642381, |
|
"memory(GiB)": 100.58, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.330731 |
|
}, |
|
{ |
|
"acc": 0.93006382, |
|
"epoch": 2.829090909090909, |
|
"grad_norm": 2.908591189518448, |
|
"learning_rate": 2.1356543813040863e-06, |
|
"loss": 0.17094066, |
|
"memory(GiB)": 100.58, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.331119 |
|
}, |
|
{ |
|
"acc": 0.94227448, |
|
"epoch": 2.8654545454545453, |
|
"grad_norm": 2.331626905910975, |
|
"learning_rate": 2.0132112671405244e-06, |
|
"loss": 0.14904225, |
|
"memory(GiB)": 100.58, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.331532 |
|
}, |
|
{ |
|
"acc": 0.93090382, |
|
"epoch": 2.901818181818182, |
|
"grad_norm": 4.223665768837086, |
|
"learning_rate": 1.8934938478853108e-06, |
|
"loss": 0.17768097, |
|
"memory(GiB)": 100.58, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.331963 |
|
}, |
|
{ |
|
"acc": 0.93722563, |
|
"epoch": 2.9381818181818184, |
|
"grad_norm": 2.7247775486261734, |
|
"learning_rate": 1.7766113574255145e-06, |
|
"loss": 0.15059752, |
|
"memory(GiB)": 100.58, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.332266 |
|
}, |
|
{ |
|
"acc": 0.94374504, |
|
"epoch": 2.9745454545454546, |
|
"grad_norm": 2.9951618135706055, |
|
"learning_rate": 1.6626704429712411e-06, |
|
"loss": 0.14953468, |
|
"memory(GiB)": 100.58, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.332599 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.9192953707496927, |
|
"eval_loss": 0.21225064992904663, |
|
"eval_runtime": 9.5239, |
|
"eval_samples_per_second": 12.18, |
|
"eval_steps_per_second": 1.575, |
|
"step": 414 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 548, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.931788793840435e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|