{ "best_global_step": 5000, "best_metric": 0.7204041481018066, "best_model_checkpoint": "/content/drive/MyDrive/starcoder_models/c_model/checkpoint-5000", "epoch": 0.053127623176394335, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005312762317639433, "grad_norm": 1.1577584743499756, "learning_rate": 4.951e-05, "loss": 1.105, "step": 50 }, { "epoch": 0.0010625524635278867, "grad_norm": 1.4432220458984375, "learning_rate": 4.902e-05, "loss": 1.1468, "step": 100 }, { "epoch": 0.00159382869529183, "grad_norm": 1.7187693119049072, "learning_rate": 4.852e-05, "loss": 1.0859, "step": 150 }, { "epoch": 0.0021251049270557733, "grad_norm": 1.4333359003067017, "learning_rate": 4.8020000000000004e-05, "loss": 1.1362, "step": 200 }, { "epoch": 0.0026563811588197167, "grad_norm": 2.835648775100708, "learning_rate": 4.7520000000000006e-05, "loss": 1.0848, "step": 250 }, { "epoch": 0.00318765739058366, "grad_norm": 2.3364615440368652, "learning_rate": 4.702e-05, "loss": 1.0969, "step": 300 }, { "epoch": 0.0037189336223476034, "grad_norm": 1.2098877429962158, "learning_rate": 4.652e-05, "loss": 1.049, "step": 350 }, { "epoch": 0.004250209854111547, "grad_norm": 1.506690502166748, "learning_rate": 4.602e-05, "loss": 1.0709, "step": 400 }, { "epoch": 0.0047814860858754905, "grad_norm": 1.7528395652770996, "learning_rate": 4.5520000000000005e-05, "loss": 1.0942, "step": 450 }, { "epoch": 0.005312762317639433, "grad_norm": 1.2501164674758911, "learning_rate": 4.502e-05, "loss": 1.0664, "step": 500 }, { "epoch": 0.005844038549403377, "grad_norm": 2.9401023387908936, "learning_rate": 4.452e-05, "loss": 1.0931, "step": 550 }, { "epoch": 0.00637531478116732, "grad_norm": 1.6167668104171753, "learning_rate": 4.402e-05, "loss": 1.079, "step": 600 }, { "epoch": 0.006906591012931264, "grad_norm": 2.1370058059692383, "learning_rate": 4.352e-05, "loss": 1.0844, "step": 650 }, { "epoch": 0.007437867244695207, "grad_norm": 1.3238478899002075, "learning_rate": 4.3020000000000005e-05, "loss": 1.1036, "step": 700 }, { "epoch": 0.00796914347645915, "grad_norm": 2.37442684173584, "learning_rate": 4.2520000000000006e-05, "loss": 1.0563, "step": 750 }, { "epoch": 0.008500419708223093, "grad_norm": 1.9257761240005493, "learning_rate": 4.202e-05, "loss": 1.0835, "step": 800 }, { "epoch": 0.009031695939987037, "grad_norm": 1.042723298072815, "learning_rate": 4.152e-05, "loss": 1.0641, "step": 850 }, { "epoch": 0.009562972171750981, "grad_norm": 2.0961551666259766, "learning_rate": 4.1020000000000004e-05, "loss": 1.1171, "step": 900 }, { "epoch": 0.010094248403514923, "grad_norm": 1.5046427249908447, "learning_rate": 4.0520000000000005e-05, "loss": 1.0323, "step": 950 }, { "epoch": 0.010625524635278867, "grad_norm": 1.616687297821045, "learning_rate": 4.002e-05, "loss": 1.036, "step": 1000 }, { "epoch": 0.01115680086704281, "grad_norm": 1.4839075803756714, "learning_rate": 3.952e-05, "loss": 1.0063, "step": 1050 }, { "epoch": 0.011688077098806754, "grad_norm": 1.5016000270843506, "learning_rate": 3.902e-05, "loss": 1.0501, "step": 1100 }, { "epoch": 0.012219353330570696, "grad_norm": 1.5629721879959106, "learning_rate": 3.8520000000000004e-05, "loss": 1.0429, "step": 1150 }, { "epoch": 0.01275062956233464, "grad_norm": 1.5994470119476318, "learning_rate": 3.802e-05, "loss": 0.9918, "step": 1200 }, { "epoch": 0.013281905794098584, "grad_norm": 1.9713184833526611, "learning_rate": 3.752e-05, "loss": 1.0252, "step": 1250 }, { "epoch": 0.013813182025862528, "grad_norm": 1.1335642337799072, "learning_rate": 3.702e-05, "loss": 1.0221, "step": 1300 }, { "epoch": 0.01434445825762647, "grad_norm": 1.7937604188919067, "learning_rate": 3.652e-05, "loss": 0.9813, "step": 1350 }, { "epoch": 0.014875734489390413, "grad_norm": 2.357769250869751, "learning_rate": 3.6020000000000004e-05, "loss": 1.025, "step": 1400 }, { "epoch": 0.015407010721154357, "grad_norm": 1.2406364679336548, "learning_rate": 3.5520000000000006e-05, "loss": 1.0438, "step": 1450 }, { "epoch": 0.0159382869529183, "grad_norm": 1.7187669277191162, "learning_rate": 3.502e-05, "loss": 0.9935, "step": 1500 }, { "epoch": 0.016469563184682245, "grad_norm": 1.1326874494552612, "learning_rate": 3.452e-05, "loss": 1.0098, "step": 1550 }, { "epoch": 0.017000839416446187, "grad_norm": 1.6138919591903687, "learning_rate": 3.402e-05, "loss": 0.9101, "step": 1600 }, { "epoch": 0.01753211564821013, "grad_norm": 1.5561543703079224, "learning_rate": 3.3520000000000004e-05, "loss": 0.987, "step": 1650 }, { "epoch": 0.018063391879974074, "grad_norm": 1.1211045980453491, "learning_rate": 3.302e-05, "loss": 1.004, "step": 1700 }, { "epoch": 0.018594668111738016, "grad_norm": 1.3929895162582397, "learning_rate": 3.252e-05, "loss": 0.9983, "step": 1750 }, { "epoch": 0.019125944343501962, "grad_norm": 1.6703131198883057, "learning_rate": 3.202e-05, "loss": 1.0461, "step": 1800 }, { "epoch": 0.019657220575265904, "grad_norm": 1.2953699827194214, "learning_rate": 3.1519999999999996e-05, "loss": 0.9973, "step": 1850 }, { "epoch": 0.020188496807029846, "grad_norm": 1.9881635904312134, "learning_rate": 3.102e-05, "loss": 0.9938, "step": 1900 }, { "epoch": 0.02071977303879379, "grad_norm": 1.4676463603973389, "learning_rate": 3.0520000000000006e-05, "loss": 1.0152, "step": 1950 }, { "epoch": 0.021251049270557733, "grad_norm": 3.423586130142212, "learning_rate": 3.0030000000000002e-05, "loss": 0.9869, "step": 2000 }, { "epoch": 0.021782325502321676, "grad_norm": 1.1552212238311768, "learning_rate": 2.9530000000000004e-05, "loss": 0.984, "step": 2050 }, { "epoch": 0.02231360173408562, "grad_norm": 1.3363642692565918, "learning_rate": 2.903e-05, "loss": 1.0018, "step": 2100 }, { "epoch": 0.022844877965849563, "grad_norm": 2.301176071166992, "learning_rate": 2.853e-05, "loss": 1.0342, "step": 2150 }, { "epoch": 0.02337615419761351, "grad_norm": 1.5134526491165161, "learning_rate": 2.803e-05, "loss": 0.9905, "step": 2200 }, { "epoch": 0.02390743042937745, "grad_norm": 1.637014389038086, "learning_rate": 2.753e-05, "loss": 0.9626, "step": 2250 }, { "epoch": 0.024438706661141393, "grad_norm": 1.652897834777832, "learning_rate": 2.703e-05, "loss": 0.9651, "step": 2300 }, { "epoch": 0.024969982892905338, "grad_norm": 1.53103506565094, "learning_rate": 2.653e-05, "loss": 1.0151, "step": 2350 }, { "epoch": 0.02550125912466928, "grad_norm": 1.250005841255188, "learning_rate": 2.603e-05, "loss": 0.9915, "step": 2400 }, { "epoch": 0.026032535356433222, "grad_norm": 2.622339963912964, "learning_rate": 2.5530000000000005e-05, "loss": 0.9735, "step": 2450 }, { "epoch": 0.026563811588197168, "grad_norm": 1.132490634918213, "learning_rate": 2.5030000000000003e-05, "loss": 1.0087, "step": 2500 }, { "epoch": 0.02709508781996111, "grad_norm": 1.7631752490997314, "learning_rate": 2.453e-05, "loss": 1.0006, "step": 2550 }, { "epoch": 0.027626364051725055, "grad_norm": 1.8810368776321411, "learning_rate": 2.4030000000000002e-05, "loss": 0.9772, "step": 2600 }, { "epoch": 0.028157640283488997, "grad_norm": 1.5510205030441284, "learning_rate": 2.3530000000000003e-05, "loss": 0.9891, "step": 2650 }, { "epoch": 0.02868891651525294, "grad_norm": 1.3251287937164307, "learning_rate": 2.303e-05, "loss": 0.9555, "step": 2700 }, { "epoch": 0.029220192747016885, "grad_norm": 1.2131097316741943, "learning_rate": 2.253e-05, "loss": 0.9646, "step": 2750 }, { "epoch": 0.029751468978780827, "grad_norm": 1.2758469581604004, "learning_rate": 2.203e-05, "loss": 0.9523, "step": 2800 }, { "epoch": 0.03028274521054477, "grad_norm": 1.6871562004089355, "learning_rate": 2.153e-05, "loss": 0.9962, "step": 2850 }, { "epoch": 0.030814021442308714, "grad_norm": 1.491795301437378, "learning_rate": 2.103e-05, "loss": 0.9878, "step": 2900 }, { "epoch": 0.03134529767407266, "grad_norm": 1.0750707387924194, "learning_rate": 2.053e-05, "loss": 0.9533, "step": 2950 }, { "epoch": 0.0318765739058366, "grad_norm": 1.2901407480239868, "learning_rate": 2.0030000000000003e-05, "loss": 0.9761, "step": 3000 }, { "epoch": 0.032407850137600544, "grad_norm": 1.603750467300415, "learning_rate": 1.953e-05, "loss": 1.0043, "step": 3050 }, { "epoch": 0.03293912636936449, "grad_norm": 1.473386526107788, "learning_rate": 1.903e-05, "loss": 0.978, "step": 3100 }, { "epoch": 0.03347040260112843, "grad_norm": 2.137418031692505, "learning_rate": 1.853e-05, "loss": 0.9763, "step": 3150 }, { "epoch": 0.034001678832892374, "grad_norm": 1.3331339359283447, "learning_rate": 1.803e-05, "loss": 0.9884, "step": 3200 }, { "epoch": 0.03453295506465632, "grad_norm": 1.2832648754119873, "learning_rate": 1.7530000000000003e-05, "loss": 0.9611, "step": 3250 }, { "epoch": 0.03506423129642026, "grad_norm": 1.0518479347229004, "learning_rate": 1.703e-05, "loss": 0.9892, "step": 3300 }, { "epoch": 0.0355955075281842, "grad_norm": 1.670954704284668, "learning_rate": 1.6530000000000003e-05, "loss": 0.9773, "step": 3350 }, { "epoch": 0.03612678375994815, "grad_norm": 1.6390531063079834, "learning_rate": 1.603e-05, "loss": 0.9389, "step": 3400 }, { "epoch": 0.036658059991712094, "grad_norm": 1.219053864479065, "learning_rate": 1.553e-05, "loss": 0.9453, "step": 3450 }, { "epoch": 0.03718933622347603, "grad_norm": 0.8718635439872742, "learning_rate": 1.503e-05, "loss": 0.9377, "step": 3500 }, { "epoch": 0.03772061245523998, "grad_norm": 1.0964168310165405, "learning_rate": 1.4530000000000001e-05, "loss": 0.9374, "step": 3550 }, { "epoch": 0.038251888687003924, "grad_norm": 1.3705147504806519, "learning_rate": 1.4030000000000001e-05, "loss": 0.968, "step": 3600 }, { "epoch": 0.03878316491876786, "grad_norm": 1.5353392362594604, "learning_rate": 1.3530000000000001e-05, "loss": 0.9438, "step": 3650 }, { "epoch": 0.03931444115053181, "grad_norm": 1.454425573348999, "learning_rate": 1.303e-05, "loss": 0.9678, "step": 3700 }, { "epoch": 0.03984571738229575, "grad_norm": 1.4466837644577026, "learning_rate": 1.253e-05, "loss": 0.9692, "step": 3750 }, { "epoch": 0.04037699361405969, "grad_norm": 1.596469521522522, "learning_rate": 1.2030000000000002e-05, "loss": 0.9479, "step": 3800 }, { "epoch": 0.04090826984582364, "grad_norm": 1.5730984210968018, "learning_rate": 1.153e-05, "loss": 0.9996, "step": 3850 }, { "epoch": 0.04143954607758758, "grad_norm": 1.9027239084243774, "learning_rate": 1.103e-05, "loss": 0.9859, "step": 3900 }, { "epoch": 0.04197082230935152, "grad_norm": 1.7876182794570923, "learning_rate": 1.053e-05, "loss": 0.9636, "step": 3950 }, { "epoch": 0.04250209854111547, "grad_norm": 0.9582003355026245, "learning_rate": 1.003e-05, "loss": 0.9772, "step": 4000 }, { "epoch": 0.04303337477287941, "grad_norm": 1.0324100255966187, "learning_rate": 9.53e-06, "loss": 0.9721, "step": 4050 }, { "epoch": 0.04356465100464335, "grad_norm": 1.882957935333252, "learning_rate": 9.030000000000002e-06, "loss": 0.9492, "step": 4100 }, { "epoch": 0.0440959272364073, "grad_norm": 1.2507244348526, "learning_rate": 8.53e-06, "loss": 0.9866, "step": 4150 }, { "epoch": 0.04462720346817124, "grad_norm": 1.27763032913208, "learning_rate": 8.03e-06, "loss": 0.9253, "step": 4200 }, { "epoch": 0.04515847969993519, "grad_norm": 1.200188159942627, "learning_rate": 7.530000000000001e-06, "loss": 0.9409, "step": 4250 }, { "epoch": 0.045689755931699126, "grad_norm": 1.7713207006454468, "learning_rate": 7.0300000000000005e-06, "loss": 1.0014, "step": 4300 }, { "epoch": 0.04622103216346307, "grad_norm": 1.3375529050827026, "learning_rate": 6.53e-06, "loss": 0.9815, "step": 4350 }, { "epoch": 0.04675230839522702, "grad_norm": 1.4422866106033325, "learning_rate": 6.03e-06, "loss": 0.9578, "step": 4400 }, { "epoch": 0.047283584626990956, "grad_norm": 2.0105831623077393, "learning_rate": 5.54e-06, "loss": 0.965, "step": 4450 }, { "epoch": 0.0478148608587549, "grad_norm": 1.636265754699707, "learning_rate": 5.04e-06, "loss": 0.9343, "step": 4500 }, { "epoch": 0.04834613709051885, "grad_norm": 1.6128933429718018, "learning_rate": 4.540000000000001e-06, "loss": 1.0021, "step": 4550 }, { "epoch": 0.048877413322282785, "grad_norm": 1.1872644424438477, "learning_rate": 4.04e-06, "loss": 0.9459, "step": 4600 }, { "epoch": 0.04940868955404673, "grad_norm": 1.2646565437316895, "learning_rate": 3.5400000000000004e-06, "loss": 0.9423, "step": 4650 }, { "epoch": 0.049939965785810676, "grad_norm": 1.0135151147842407, "learning_rate": 3.04e-06, "loss": 0.9132, "step": 4700 }, { "epoch": 0.050471242017574615, "grad_norm": 1.7171257734298706, "learning_rate": 2.54e-06, "loss": 0.9368, "step": 4750 }, { "epoch": 0.05100251824933856, "grad_norm": 1.7923991680145264, "learning_rate": 2.0400000000000004e-06, "loss": 0.9543, "step": 4800 }, { "epoch": 0.051533794481102506, "grad_norm": 1.366851806640625, "learning_rate": 1.54e-06, "loss": 0.9597, "step": 4850 }, { "epoch": 0.052065070712866444, "grad_norm": 1.290432333946228, "learning_rate": 1.04e-06, "loss": 0.8798, "step": 4900 }, { "epoch": 0.05259634694463039, "grad_norm": 1.3311187028884888, "learning_rate": 5.4e-07, "loss": 0.9275, "step": 4950 }, { "epoch": 0.053127623176394335, "grad_norm": 1.2754595279693604, "learning_rate": 4e-08, "loss": 0.8968, "step": 5000 }, { "epoch": 0.053127623176394335, "eval_loss": 0.7204041481018066, "eval_runtime": 3441.0996, "eval_samples_per_second": 35.164, "eval_steps_per_second": 5.861, "step": 5000 } ], "logging_steps": 50, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.6578233851904e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }