Safetensors
ParagonLight's picture
upload loras
b1775c3
{
"best_metric": 0.21073441207408905,
"best_model_checkpoint": "ckpt/llama2_13b_fuze27_no_sys/object_counting_no_sys/checkpoint-400",
"epoch": 3.6842105263157894,
"eval_steps": 100,
"global_step": 700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05263157894736842,
"grad_norm": 6.26442289352417,
"learning_rate": 5e-05,
"loss": 10.645,
"step": 10
},
{
"epoch": 0.10526315789473684,
"grad_norm": 4.90908145904541,
"learning_rate": 0.0001,
"loss": 4.9002,
"step": 20
},
{
"epoch": 0.15789473684210525,
"grad_norm": 7.5457682609558105,
"learning_rate": 9.99714745464859e-05,
"loss": 0.9012,
"step": 30
},
{
"epoch": 0.21052631578947367,
"grad_norm": 9.139291763305664,
"learning_rate": 9.988593073400354e-05,
"loss": 0.7022,
"step": 40
},
{
"epoch": 0.2631578947368421,
"grad_norm": 5.00139856338501,
"learning_rate": 9.974346616959476e-05,
"loss": 0.7425,
"step": 50
},
{
"epoch": 0.3157894736842105,
"grad_norm": 6.002871990203857,
"learning_rate": 9.954424340791196e-05,
"loss": 0.7609,
"step": 60
},
{
"epoch": 0.3684210526315789,
"grad_norm": 6.101377010345459,
"learning_rate": 9.928848976574019e-05,
"loss": 0.6316,
"step": 70
},
{
"epoch": 0.42105263157894735,
"grad_norm": 5.7651824951171875,
"learning_rate": 9.897649706262473e-05,
"loss": 0.6406,
"step": 80
},
{
"epoch": 0.47368421052631576,
"grad_norm": 2.46134614944458,
"learning_rate": 9.860862128789953e-05,
"loss": 0.5597,
"step": 90
},
{
"epoch": 0.5263157894736842,
"grad_norm": 4.3927178382873535,
"learning_rate": 9.818528219449705e-05,
"loss": 0.477,
"step": 100
},
{
"epoch": 0.5263157894736842,
"eval_loss": 0.46219402551651,
"eval_runtime": 0.6936,
"eval_samples_per_second": 57.669,
"eval_steps_per_second": 14.417,
"step": 100
},
{
"epoch": 0.5789473684210527,
"grad_norm": 6.058330059051514,
"learning_rate": 9.770696282000244e-05,
"loss": 0.4798,
"step": 110
},
{
"epoch": 0.631578947368421,
"grad_norm": 4.410764217376709,
"learning_rate": 9.717420893549902e-05,
"loss": 0.5097,
"step": 120
},
{
"epoch": 0.6842105263157895,
"grad_norm": 1.7051620483398438,
"learning_rate": 9.658762842283343e-05,
"loss": 0.6507,
"step": 130
},
{
"epoch": 0.7368421052631579,
"grad_norm": 3.638519763946533,
"learning_rate": 9.594789058101153e-05,
"loss": 0.4961,
"step": 140
},
{
"epoch": 0.7894736842105263,
"grad_norm": 1.8080860376358032,
"learning_rate": 9.525572536251607e-05,
"loss": 0.3643,
"step": 150
},
{
"epoch": 0.8421052631578947,
"grad_norm": 4.843872547149658,
"learning_rate": 9.451192254041758e-05,
"loss": 0.5859,
"step": 160
},
{
"epoch": 0.8947368421052632,
"grad_norm": 7.292293548583984,
"learning_rate": 9.371733080722911e-05,
"loss": 0.4765,
"step": 170
},
{
"epoch": 0.9473684210526315,
"grad_norm": 2.1483707427978516,
"learning_rate": 9.287285680653254e-05,
"loss": 0.4496,
"step": 180
},
{
"epoch": 1.0,
"grad_norm": 1.2343636751174927,
"learning_rate": 9.197946409848194e-05,
"loss": 0.42,
"step": 190
},
{
"epoch": 1.0526315789473684,
"grad_norm": 3.902963638305664,
"learning_rate": 9.103817206036382e-05,
"loss": 0.4647,
"step": 200
},
{
"epoch": 1.0526315789473684,
"eval_loss": 0.2513052821159363,
"eval_runtime": 0.6934,
"eval_samples_per_second": 57.69,
"eval_steps_per_second": 14.423,
"step": 200
},
{
"epoch": 1.1052631578947367,
"grad_norm": 3.0811166763305664,
"learning_rate": 9.005005472346924e-05,
"loss": 0.3552,
"step": 210
},
{
"epoch": 1.1578947368421053,
"grad_norm": 12.97741413116455,
"learning_rate": 8.90162395476046e-05,
"loss": 0.5985,
"step": 220
},
{
"epoch": 1.2105263157894737,
"grad_norm": 4.255702495574951,
"learning_rate": 8.793790613463955e-05,
"loss": 0.6665,
"step": 230
},
{
"epoch": 1.263157894736842,
"grad_norm": 3.5546047687530518,
"learning_rate": 8.681628488255986e-05,
"loss": 0.4249,
"step": 240
},
{
"epoch": 1.3157894736842106,
"grad_norm": 9.395575523376465,
"learning_rate": 8.565265558156101e-05,
"loss": 0.4735,
"step": 250
},
{
"epoch": 1.368421052631579,
"grad_norm": 3.7864913940429688,
"learning_rate": 8.444834595378434e-05,
"loss": 0.265,
"step": 260
},
{
"epoch": 1.4210526315789473,
"grad_norm": 6.043397426605225,
"learning_rate": 8.320473013836196e-05,
"loss": 0.5925,
"step": 270
},
{
"epoch": 1.4736842105263157,
"grad_norm": 5.700586795806885,
"learning_rate": 8.192322712349917e-05,
"loss": 0.4528,
"step": 280
},
{
"epoch": 1.526315789473684,
"grad_norm": 2.9391582012176514,
"learning_rate": 8.060529912738315e-05,
"loss": 0.3558,
"step": 290
},
{
"epoch": 1.5789473684210527,
"grad_norm": 1.8771816492080688,
"learning_rate": 7.925244992976538e-05,
"loss": 0.5231,
"step": 300
},
{
"epoch": 1.5789473684210527,
"eval_loss": 0.34315773844718933,
"eval_runtime": 0.6944,
"eval_samples_per_second": 57.607,
"eval_steps_per_second": 14.402,
"step": 300
},
{
"epoch": 1.631578947368421,
"grad_norm": 4.8421478271484375,
"learning_rate": 7.786622315612183e-05,
"loss": 0.3751,
"step": 310
},
{
"epoch": 1.6842105263157894,
"grad_norm": 11.370196342468262,
"learning_rate": 7.644820051634812e-05,
"loss": 0.3844,
"step": 320
},
{
"epoch": 1.736842105263158,
"grad_norm": 3.433976173400879,
"learning_rate": 7.500000000000001e-05,
"loss": 0.5589,
"step": 330
},
{
"epoch": 1.7894736842105263,
"grad_norm": 6.323594570159912,
"learning_rate": 7.35232740301378e-05,
"loss": 0.2688,
"step": 340
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.5850329399108887,
"learning_rate": 7.201970757788172e-05,
"loss": 0.2528,
"step": 350
},
{
"epoch": 1.8947368421052633,
"grad_norm": 1.9679738283157349,
"learning_rate": 7.049101623982937e-05,
"loss": 0.2398,
"step": 360
},
{
"epoch": 1.9473684210526314,
"grad_norm": 12.53429889678955,
"learning_rate": 6.89389442805288e-05,
"loss": 0.4979,
"step": 370
},
{
"epoch": 2.0,
"grad_norm": 7.08478307723999,
"learning_rate": 6.736526264224101e-05,
"loss": 0.5478,
"step": 380
},
{
"epoch": 2.0526315789473686,
"grad_norm": 9.115711212158203,
"learning_rate": 6.577176692426279e-05,
"loss": 0.4545,
"step": 390
},
{
"epoch": 2.1052631578947367,
"grad_norm": 6.18236780166626,
"learning_rate": 6.416027533411519e-05,
"loss": 0.4381,
"step": 400
},
{
"epoch": 2.1052631578947367,
"eval_loss": 0.21073441207408905,
"eval_runtime": 0.6912,
"eval_samples_per_second": 57.873,
"eval_steps_per_second": 14.468,
"step": 400
},
{
"epoch": 2.1578947368421053,
"grad_norm": 1.294372797012329,
"learning_rate": 6.253262661293604e-05,
"loss": 0.2746,
"step": 410
},
{
"epoch": 2.2105263157894735,
"grad_norm": 9.00351619720459,
"learning_rate": 6.0890677937442574e-05,
"loss": 0.2276,
"step": 420
},
{
"epoch": 2.263157894736842,
"grad_norm": 10.986132621765137,
"learning_rate": 5.923630280085948e-05,
"loss": 0.2474,
"step": 430
},
{
"epoch": 2.3157894736842106,
"grad_norm": 12.219366073608398,
"learning_rate": 5.757138887522884e-05,
"loss": 0.2644,
"step": 440
},
{
"epoch": 2.3684210526315788,
"grad_norm": 2.5177173614501953,
"learning_rate": 5.5897835857542317e-05,
"loss": 0.3294,
"step": 450
},
{
"epoch": 2.4210526315789473,
"grad_norm": 6.946788787841797,
"learning_rate": 5.4217553302152237e-05,
"loss": 0.5553,
"step": 460
},
{
"epoch": 2.473684210526316,
"grad_norm": 3.0805630683898926,
"learning_rate": 5.2532458441935636e-05,
"loss": 0.1107,
"step": 470
},
{
"epoch": 2.526315789473684,
"grad_norm": 3.5907771587371826,
"learning_rate": 5.084447400069655e-05,
"loss": 0.2507,
"step": 480
},
{
"epoch": 2.5789473684210527,
"grad_norm": 1.5780644416809082,
"learning_rate": 4.915552599930345e-05,
"loss": 0.1839,
"step": 490
},
{
"epoch": 2.6315789473684212,
"grad_norm": 2.739733934402466,
"learning_rate": 4.746754155806437e-05,
"loss": 0.2159,
"step": 500
},
{
"epoch": 2.6315789473684212,
"eval_loss": 0.36658158898353577,
"eval_runtime": 0.6913,
"eval_samples_per_second": 57.864,
"eval_steps_per_second": 14.466,
"step": 500
},
{
"epoch": 2.6842105263157894,
"grad_norm": 1.3136796951293945,
"learning_rate": 4.578244669784777e-05,
"loss": 0.1742,
"step": 510
},
{
"epoch": 2.736842105263158,
"grad_norm": 0.0468708761036396,
"learning_rate": 4.410216414245771e-05,
"loss": 0.3915,
"step": 520
},
{
"epoch": 2.7894736842105265,
"grad_norm": 12.833720207214355,
"learning_rate": 4.2428611124771184e-05,
"loss": 0.2055,
"step": 530
},
{
"epoch": 2.8421052631578947,
"grad_norm": 0.10741530358791351,
"learning_rate": 4.076369719914055e-05,
"loss": 0.3395,
"step": 540
},
{
"epoch": 2.8947368421052633,
"grad_norm": 6.424005508422852,
"learning_rate": 3.9109322062557424e-05,
"loss": 0.2351,
"step": 550
},
{
"epoch": 2.9473684210526314,
"grad_norm": 7.805177211761475,
"learning_rate": 3.746737338706397e-05,
"loss": 0.1548,
"step": 560
},
{
"epoch": 3.0,
"grad_norm": 1.9612077474594116,
"learning_rate": 3.58397246658848e-05,
"loss": 0.1734,
"step": 570
},
{
"epoch": 3.0526315789473686,
"grad_norm": 6.020046234130859,
"learning_rate": 3.422823307573722e-05,
"loss": 0.1015,
"step": 580
},
{
"epoch": 3.1052631578947367,
"grad_norm": 5.7347307205200195,
"learning_rate": 3.263473735775899e-05,
"loss": 0.1705,
"step": 590
},
{
"epoch": 3.1578947368421053,
"grad_norm": 15.45874309539795,
"learning_rate": 3.10610557194712e-05,
"loss": 0.1934,
"step": 600
},
{
"epoch": 3.1578947368421053,
"eval_loss": 0.26252931356430054,
"eval_runtime": 0.6908,
"eval_samples_per_second": 57.906,
"eval_steps_per_second": 14.476,
"step": 600
},
{
"epoch": 3.2105263157894735,
"grad_norm": 1.443434715270996,
"learning_rate": 2.950898376017064e-05,
"loss": 0.1024,
"step": 610
},
{
"epoch": 3.263157894736842,
"grad_norm": 8.99201488494873,
"learning_rate": 2.798029242211828e-05,
"loss": 0.1786,
"step": 620
},
{
"epoch": 3.3157894736842106,
"grad_norm": 3.1811983585357666,
"learning_rate": 2.6476725969862227e-05,
"loss": 0.1907,
"step": 630
},
{
"epoch": 3.3684210526315788,
"grad_norm": 16.603193283081055,
"learning_rate": 2.500000000000001e-05,
"loss": 0.125,
"step": 640
},
{
"epoch": 3.4210526315789473,
"grad_norm": 6.346674919128418,
"learning_rate": 2.3551799483651894e-05,
"loss": 0.1509,
"step": 650
},
{
"epoch": 3.473684210526316,
"grad_norm": 3.2951841354370117,
"learning_rate": 2.2133776843878186e-05,
"loss": 0.1502,
"step": 660
},
{
"epoch": 3.526315789473684,
"grad_norm": 4.906289100646973,
"learning_rate": 2.074755007023461e-05,
"loss": 0.1458,
"step": 670
},
{
"epoch": 3.5789473684210527,
"grad_norm": 0.9275074005126953,
"learning_rate": 1.9394700872616855e-05,
"loss": 0.2181,
"step": 680
},
{
"epoch": 3.6315789473684212,
"grad_norm": 0.8629674315452576,
"learning_rate": 1.807677287650083e-05,
"loss": 0.0979,
"step": 690
},
{
"epoch": 3.6842105263157894,
"grad_norm": 6.139326095581055,
"learning_rate": 1.6795269861638042e-05,
"loss": 0.2701,
"step": 700
},
{
"epoch": 3.6842105263157894,
"eval_loss": 0.2853814661502838,
"eval_runtime": 0.691,
"eval_samples_per_second": 57.884,
"eval_steps_per_second": 14.471,
"step": 700
},
{
"epoch": 3.6842105263157894,
"step": 700,
"total_flos": 1.461153753759744e+16,
"train_loss": 0.5859067852156503,
"train_runtime": 192.1858,
"train_samples_per_second": 19.773,
"train_steps_per_second": 4.943
}
],
"logging_steps": 10,
"max_steps": 950,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"total_flos": 1.461153753759744e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}