starcoder2-c-lora / checkpoint-5000 /trainer_state.json
Se1ay's picture
Upload private fine-tuned StarCoder2 LoRA model for C
c8d3184 verified
{
"best_global_step": 5000,
"best_metric": 0.7204041481018066,
"best_model_checkpoint": "/content/drive/MyDrive/starcoder_models/c_model/checkpoint-5000",
"epoch": 0.053127623176394335,
"eval_steps": 500,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005312762317639433,
"grad_norm": 1.1577584743499756,
"learning_rate": 4.951e-05,
"loss": 1.105,
"step": 50
},
{
"epoch": 0.0010625524635278867,
"grad_norm": 1.4432220458984375,
"learning_rate": 4.902e-05,
"loss": 1.1468,
"step": 100
},
{
"epoch": 0.00159382869529183,
"grad_norm": 1.7187693119049072,
"learning_rate": 4.852e-05,
"loss": 1.0859,
"step": 150
},
{
"epoch": 0.0021251049270557733,
"grad_norm": 1.4333359003067017,
"learning_rate": 4.8020000000000004e-05,
"loss": 1.1362,
"step": 200
},
{
"epoch": 0.0026563811588197167,
"grad_norm": 2.835648775100708,
"learning_rate": 4.7520000000000006e-05,
"loss": 1.0848,
"step": 250
},
{
"epoch": 0.00318765739058366,
"grad_norm": 2.3364615440368652,
"learning_rate": 4.702e-05,
"loss": 1.0969,
"step": 300
},
{
"epoch": 0.0037189336223476034,
"grad_norm": 1.2098877429962158,
"learning_rate": 4.652e-05,
"loss": 1.049,
"step": 350
},
{
"epoch": 0.004250209854111547,
"grad_norm": 1.506690502166748,
"learning_rate": 4.602e-05,
"loss": 1.0709,
"step": 400
},
{
"epoch": 0.0047814860858754905,
"grad_norm": 1.7528395652770996,
"learning_rate": 4.5520000000000005e-05,
"loss": 1.0942,
"step": 450
},
{
"epoch": 0.005312762317639433,
"grad_norm": 1.2501164674758911,
"learning_rate": 4.502e-05,
"loss": 1.0664,
"step": 500
},
{
"epoch": 0.005844038549403377,
"grad_norm": 2.9401023387908936,
"learning_rate": 4.452e-05,
"loss": 1.0931,
"step": 550
},
{
"epoch": 0.00637531478116732,
"grad_norm": 1.6167668104171753,
"learning_rate": 4.402e-05,
"loss": 1.079,
"step": 600
},
{
"epoch": 0.006906591012931264,
"grad_norm": 2.1370058059692383,
"learning_rate": 4.352e-05,
"loss": 1.0844,
"step": 650
},
{
"epoch": 0.007437867244695207,
"grad_norm": 1.3238478899002075,
"learning_rate": 4.3020000000000005e-05,
"loss": 1.1036,
"step": 700
},
{
"epoch": 0.00796914347645915,
"grad_norm": 2.37442684173584,
"learning_rate": 4.2520000000000006e-05,
"loss": 1.0563,
"step": 750
},
{
"epoch": 0.008500419708223093,
"grad_norm": 1.9257761240005493,
"learning_rate": 4.202e-05,
"loss": 1.0835,
"step": 800
},
{
"epoch": 0.009031695939987037,
"grad_norm": 1.042723298072815,
"learning_rate": 4.152e-05,
"loss": 1.0641,
"step": 850
},
{
"epoch": 0.009562972171750981,
"grad_norm": 2.0961551666259766,
"learning_rate": 4.1020000000000004e-05,
"loss": 1.1171,
"step": 900
},
{
"epoch": 0.010094248403514923,
"grad_norm": 1.5046427249908447,
"learning_rate": 4.0520000000000005e-05,
"loss": 1.0323,
"step": 950
},
{
"epoch": 0.010625524635278867,
"grad_norm": 1.616687297821045,
"learning_rate": 4.002e-05,
"loss": 1.036,
"step": 1000
},
{
"epoch": 0.01115680086704281,
"grad_norm": 1.4839075803756714,
"learning_rate": 3.952e-05,
"loss": 1.0063,
"step": 1050
},
{
"epoch": 0.011688077098806754,
"grad_norm": 1.5016000270843506,
"learning_rate": 3.902e-05,
"loss": 1.0501,
"step": 1100
},
{
"epoch": 0.012219353330570696,
"grad_norm": 1.5629721879959106,
"learning_rate": 3.8520000000000004e-05,
"loss": 1.0429,
"step": 1150
},
{
"epoch": 0.01275062956233464,
"grad_norm": 1.5994470119476318,
"learning_rate": 3.802e-05,
"loss": 0.9918,
"step": 1200
},
{
"epoch": 0.013281905794098584,
"grad_norm": 1.9713184833526611,
"learning_rate": 3.752e-05,
"loss": 1.0252,
"step": 1250
},
{
"epoch": 0.013813182025862528,
"grad_norm": 1.1335642337799072,
"learning_rate": 3.702e-05,
"loss": 1.0221,
"step": 1300
},
{
"epoch": 0.01434445825762647,
"grad_norm": 1.7937604188919067,
"learning_rate": 3.652e-05,
"loss": 0.9813,
"step": 1350
},
{
"epoch": 0.014875734489390413,
"grad_norm": 2.357769250869751,
"learning_rate": 3.6020000000000004e-05,
"loss": 1.025,
"step": 1400
},
{
"epoch": 0.015407010721154357,
"grad_norm": 1.2406364679336548,
"learning_rate": 3.5520000000000006e-05,
"loss": 1.0438,
"step": 1450
},
{
"epoch": 0.0159382869529183,
"grad_norm": 1.7187669277191162,
"learning_rate": 3.502e-05,
"loss": 0.9935,
"step": 1500
},
{
"epoch": 0.016469563184682245,
"grad_norm": 1.1326874494552612,
"learning_rate": 3.452e-05,
"loss": 1.0098,
"step": 1550
},
{
"epoch": 0.017000839416446187,
"grad_norm": 1.6138919591903687,
"learning_rate": 3.402e-05,
"loss": 0.9101,
"step": 1600
},
{
"epoch": 0.01753211564821013,
"grad_norm": 1.5561543703079224,
"learning_rate": 3.3520000000000004e-05,
"loss": 0.987,
"step": 1650
},
{
"epoch": 0.018063391879974074,
"grad_norm": 1.1211045980453491,
"learning_rate": 3.302e-05,
"loss": 1.004,
"step": 1700
},
{
"epoch": 0.018594668111738016,
"grad_norm": 1.3929895162582397,
"learning_rate": 3.252e-05,
"loss": 0.9983,
"step": 1750
},
{
"epoch": 0.019125944343501962,
"grad_norm": 1.6703131198883057,
"learning_rate": 3.202e-05,
"loss": 1.0461,
"step": 1800
},
{
"epoch": 0.019657220575265904,
"grad_norm": 1.2953699827194214,
"learning_rate": 3.1519999999999996e-05,
"loss": 0.9973,
"step": 1850
},
{
"epoch": 0.020188496807029846,
"grad_norm": 1.9881635904312134,
"learning_rate": 3.102e-05,
"loss": 0.9938,
"step": 1900
},
{
"epoch": 0.02071977303879379,
"grad_norm": 1.4676463603973389,
"learning_rate": 3.0520000000000006e-05,
"loss": 1.0152,
"step": 1950
},
{
"epoch": 0.021251049270557733,
"grad_norm": 3.423586130142212,
"learning_rate": 3.0030000000000002e-05,
"loss": 0.9869,
"step": 2000
},
{
"epoch": 0.021782325502321676,
"grad_norm": 1.1552212238311768,
"learning_rate": 2.9530000000000004e-05,
"loss": 0.984,
"step": 2050
},
{
"epoch": 0.02231360173408562,
"grad_norm": 1.3363642692565918,
"learning_rate": 2.903e-05,
"loss": 1.0018,
"step": 2100
},
{
"epoch": 0.022844877965849563,
"grad_norm": 2.301176071166992,
"learning_rate": 2.853e-05,
"loss": 1.0342,
"step": 2150
},
{
"epoch": 0.02337615419761351,
"grad_norm": 1.5134526491165161,
"learning_rate": 2.803e-05,
"loss": 0.9905,
"step": 2200
},
{
"epoch": 0.02390743042937745,
"grad_norm": 1.637014389038086,
"learning_rate": 2.753e-05,
"loss": 0.9626,
"step": 2250
},
{
"epoch": 0.024438706661141393,
"grad_norm": 1.652897834777832,
"learning_rate": 2.703e-05,
"loss": 0.9651,
"step": 2300
},
{
"epoch": 0.024969982892905338,
"grad_norm": 1.53103506565094,
"learning_rate": 2.653e-05,
"loss": 1.0151,
"step": 2350
},
{
"epoch": 0.02550125912466928,
"grad_norm": 1.250005841255188,
"learning_rate": 2.603e-05,
"loss": 0.9915,
"step": 2400
},
{
"epoch": 0.026032535356433222,
"grad_norm": 2.622339963912964,
"learning_rate": 2.5530000000000005e-05,
"loss": 0.9735,
"step": 2450
},
{
"epoch": 0.026563811588197168,
"grad_norm": 1.132490634918213,
"learning_rate": 2.5030000000000003e-05,
"loss": 1.0087,
"step": 2500
},
{
"epoch": 0.02709508781996111,
"grad_norm": 1.7631752490997314,
"learning_rate": 2.453e-05,
"loss": 1.0006,
"step": 2550
},
{
"epoch": 0.027626364051725055,
"grad_norm": 1.8810368776321411,
"learning_rate": 2.4030000000000002e-05,
"loss": 0.9772,
"step": 2600
},
{
"epoch": 0.028157640283488997,
"grad_norm": 1.5510205030441284,
"learning_rate": 2.3530000000000003e-05,
"loss": 0.9891,
"step": 2650
},
{
"epoch": 0.02868891651525294,
"grad_norm": 1.3251287937164307,
"learning_rate": 2.303e-05,
"loss": 0.9555,
"step": 2700
},
{
"epoch": 0.029220192747016885,
"grad_norm": 1.2131097316741943,
"learning_rate": 2.253e-05,
"loss": 0.9646,
"step": 2750
},
{
"epoch": 0.029751468978780827,
"grad_norm": 1.2758469581604004,
"learning_rate": 2.203e-05,
"loss": 0.9523,
"step": 2800
},
{
"epoch": 0.03028274521054477,
"grad_norm": 1.6871562004089355,
"learning_rate": 2.153e-05,
"loss": 0.9962,
"step": 2850
},
{
"epoch": 0.030814021442308714,
"grad_norm": 1.491795301437378,
"learning_rate": 2.103e-05,
"loss": 0.9878,
"step": 2900
},
{
"epoch": 0.03134529767407266,
"grad_norm": 1.0750707387924194,
"learning_rate": 2.053e-05,
"loss": 0.9533,
"step": 2950
},
{
"epoch": 0.0318765739058366,
"grad_norm": 1.2901407480239868,
"learning_rate": 2.0030000000000003e-05,
"loss": 0.9761,
"step": 3000
},
{
"epoch": 0.032407850137600544,
"grad_norm": 1.603750467300415,
"learning_rate": 1.953e-05,
"loss": 1.0043,
"step": 3050
},
{
"epoch": 0.03293912636936449,
"grad_norm": 1.473386526107788,
"learning_rate": 1.903e-05,
"loss": 0.978,
"step": 3100
},
{
"epoch": 0.03347040260112843,
"grad_norm": 2.137418031692505,
"learning_rate": 1.853e-05,
"loss": 0.9763,
"step": 3150
},
{
"epoch": 0.034001678832892374,
"grad_norm": 1.3331339359283447,
"learning_rate": 1.803e-05,
"loss": 0.9884,
"step": 3200
},
{
"epoch": 0.03453295506465632,
"grad_norm": 1.2832648754119873,
"learning_rate": 1.7530000000000003e-05,
"loss": 0.9611,
"step": 3250
},
{
"epoch": 0.03506423129642026,
"grad_norm": 1.0518479347229004,
"learning_rate": 1.703e-05,
"loss": 0.9892,
"step": 3300
},
{
"epoch": 0.0355955075281842,
"grad_norm": 1.670954704284668,
"learning_rate": 1.6530000000000003e-05,
"loss": 0.9773,
"step": 3350
},
{
"epoch": 0.03612678375994815,
"grad_norm": 1.6390531063079834,
"learning_rate": 1.603e-05,
"loss": 0.9389,
"step": 3400
},
{
"epoch": 0.036658059991712094,
"grad_norm": 1.219053864479065,
"learning_rate": 1.553e-05,
"loss": 0.9453,
"step": 3450
},
{
"epoch": 0.03718933622347603,
"grad_norm": 0.8718635439872742,
"learning_rate": 1.503e-05,
"loss": 0.9377,
"step": 3500
},
{
"epoch": 0.03772061245523998,
"grad_norm": 1.0964168310165405,
"learning_rate": 1.4530000000000001e-05,
"loss": 0.9374,
"step": 3550
},
{
"epoch": 0.038251888687003924,
"grad_norm": 1.3705147504806519,
"learning_rate": 1.4030000000000001e-05,
"loss": 0.968,
"step": 3600
},
{
"epoch": 0.03878316491876786,
"grad_norm": 1.5353392362594604,
"learning_rate": 1.3530000000000001e-05,
"loss": 0.9438,
"step": 3650
},
{
"epoch": 0.03931444115053181,
"grad_norm": 1.454425573348999,
"learning_rate": 1.303e-05,
"loss": 0.9678,
"step": 3700
},
{
"epoch": 0.03984571738229575,
"grad_norm": 1.4466837644577026,
"learning_rate": 1.253e-05,
"loss": 0.9692,
"step": 3750
},
{
"epoch": 0.04037699361405969,
"grad_norm": 1.596469521522522,
"learning_rate": 1.2030000000000002e-05,
"loss": 0.9479,
"step": 3800
},
{
"epoch": 0.04090826984582364,
"grad_norm": 1.5730984210968018,
"learning_rate": 1.153e-05,
"loss": 0.9996,
"step": 3850
},
{
"epoch": 0.04143954607758758,
"grad_norm": 1.9027239084243774,
"learning_rate": 1.103e-05,
"loss": 0.9859,
"step": 3900
},
{
"epoch": 0.04197082230935152,
"grad_norm": 1.7876182794570923,
"learning_rate": 1.053e-05,
"loss": 0.9636,
"step": 3950
},
{
"epoch": 0.04250209854111547,
"grad_norm": 0.9582003355026245,
"learning_rate": 1.003e-05,
"loss": 0.9772,
"step": 4000
},
{
"epoch": 0.04303337477287941,
"grad_norm": 1.0324100255966187,
"learning_rate": 9.53e-06,
"loss": 0.9721,
"step": 4050
},
{
"epoch": 0.04356465100464335,
"grad_norm": 1.882957935333252,
"learning_rate": 9.030000000000002e-06,
"loss": 0.9492,
"step": 4100
},
{
"epoch": 0.0440959272364073,
"grad_norm": 1.2507244348526,
"learning_rate": 8.53e-06,
"loss": 0.9866,
"step": 4150
},
{
"epoch": 0.04462720346817124,
"grad_norm": 1.27763032913208,
"learning_rate": 8.03e-06,
"loss": 0.9253,
"step": 4200
},
{
"epoch": 0.04515847969993519,
"grad_norm": 1.200188159942627,
"learning_rate": 7.530000000000001e-06,
"loss": 0.9409,
"step": 4250
},
{
"epoch": 0.045689755931699126,
"grad_norm": 1.7713207006454468,
"learning_rate": 7.0300000000000005e-06,
"loss": 1.0014,
"step": 4300
},
{
"epoch": 0.04622103216346307,
"grad_norm": 1.3375529050827026,
"learning_rate": 6.53e-06,
"loss": 0.9815,
"step": 4350
},
{
"epoch": 0.04675230839522702,
"grad_norm": 1.4422866106033325,
"learning_rate": 6.03e-06,
"loss": 0.9578,
"step": 4400
},
{
"epoch": 0.047283584626990956,
"grad_norm": 2.0105831623077393,
"learning_rate": 5.54e-06,
"loss": 0.965,
"step": 4450
},
{
"epoch": 0.0478148608587549,
"grad_norm": 1.636265754699707,
"learning_rate": 5.04e-06,
"loss": 0.9343,
"step": 4500
},
{
"epoch": 0.04834613709051885,
"grad_norm": 1.6128933429718018,
"learning_rate": 4.540000000000001e-06,
"loss": 1.0021,
"step": 4550
},
{
"epoch": 0.048877413322282785,
"grad_norm": 1.1872644424438477,
"learning_rate": 4.04e-06,
"loss": 0.9459,
"step": 4600
},
{
"epoch": 0.04940868955404673,
"grad_norm": 1.2646565437316895,
"learning_rate": 3.5400000000000004e-06,
"loss": 0.9423,
"step": 4650
},
{
"epoch": 0.049939965785810676,
"grad_norm": 1.0135151147842407,
"learning_rate": 3.04e-06,
"loss": 0.9132,
"step": 4700
},
{
"epoch": 0.050471242017574615,
"grad_norm": 1.7171257734298706,
"learning_rate": 2.54e-06,
"loss": 0.9368,
"step": 4750
},
{
"epoch": 0.05100251824933856,
"grad_norm": 1.7923991680145264,
"learning_rate": 2.0400000000000004e-06,
"loss": 0.9543,
"step": 4800
},
{
"epoch": 0.051533794481102506,
"grad_norm": 1.366851806640625,
"learning_rate": 1.54e-06,
"loss": 0.9597,
"step": 4850
},
{
"epoch": 0.052065070712866444,
"grad_norm": 1.290432333946228,
"learning_rate": 1.04e-06,
"loss": 0.8798,
"step": 4900
},
{
"epoch": 0.05259634694463039,
"grad_norm": 1.3311187028884888,
"learning_rate": 5.4e-07,
"loss": 0.9275,
"step": 4950
},
{
"epoch": 0.053127623176394335,
"grad_norm": 1.2754595279693604,
"learning_rate": 4e-08,
"loss": 0.8968,
"step": 5000
},
{
"epoch": 0.053127623176394335,
"eval_loss": 0.7204041481018066,
"eval_runtime": 3441.0996,
"eval_samples_per_second": 35.164,
"eval_steps_per_second": 5.861,
"step": 5000
}
],
"logging_steps": 50,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6578233851904e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}