flan-dialogue-summary-checkpoint / trainer_state.json
truocpham's picture
upload flan dialogue summary checkpoint
d942770
raw
history blame
9.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 31150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 9.94649545211343e-06,
"loss": 4.4121,
"step": 500
},
{
"epoch": 0.32,
"learning_rate": 9.892990904226861e-06,
"loss": 0.1731,
"step": 1000
},
{
"epoch": 0.48,
"learning_rate": 9.83948635634029e-06,
"loss": 0.1161,
"step": 1500
},
{
"epoch": 0.64,
"learning_rate": 9.78598180845372e-06,
"loss": 0.1059,
"step": 2000
},
{
"epoch": 0.8,
"learning_rate": 9.73247726056715e-06,
"loss": 0.1011,
"step": 2500
},
{
"epoch": 0.96,
"learning_rate": 9.67897271268058e-06,
"loss": 0.1013,
"step": 3000
},
{
"epoch": 1.0,
"eval_loss": 0.08340005576610565,
"eval_runtime": 17.1185,
"eval_samples_per_second": 29.208,
"eval_steps_per_second": 7.302,
"step": 3115
},
{
"epoch": 1.12,
"learning_rate": 9.62546816479401e-06,
"loss": 0.0987,
"step": 3500
},
{
"epoch": 1.28,
"learning_rate": 9.571963616907438e-06,
"loss": 0.0946,
"step": 4000
},
{
"epoch": 1.44,
"learning_rate": 9.518459069020868e-06,
"loss": 0.0956,
"step": 4500
},
{
"epoch": 1.61,
"learning_rate": 9.464954521134298e-06,
"loss": 0.0984,
"step": 5000
},
{
"epoch": 1.77,
"learning_rate": 9.411449973247728e-06,
"loss": 0.0949,
"step": 5500
},
{
"epoch": 1.93,
"learning_rate": 9.357945425361158e-06,
"loss": 0.0929,
"step": 6000
},
{
"epoch": 2.0,
"eval_loss": 0.08016223460435867,
"eval_runtime": 17.1151,
"eval_samples_per_second": 29.214,
"eval_steps_per_second": 7.304,
"step": 6230
},
{
"epoch": 2.09,
"learning_rate": 9.304440877474586e-06,
"loss": 0.0909,
"step": 6500
},
{
"epoch": 2.25,
"learning_rate": 9.250936329588016e-06,
"loss": 0.0917,
"step": 7000
},
{
"epoch": 2.41,
"learning_rate": 9.197431781701446e-06,
"loss": 0.0913,
"step": 7500
},
{
"epoch": 2.57,
"learning_rate": 9.143927233814876e-06,
"loss": 0.0912,
"step": 8000
},
{
"epoch": 2.73,
"learning_rate": 9.090422685928304e-06,
"loss": 0.0904,
"step": 8500
},
{
"epoch": 2.89,
"learning_rate": 9.036918138041734e-06,
"loss": 0.0908,
"step": 9000
},
{
"epoch": 3.0,
"eval_loss": 0.0781576856970787,
"eval_runtime": 17.0736,
"eval_samples_per_second": 29.285,
"eval_steps_per_second": 7.321,
"step": 9345
},
{
"epoch": 3.05,
"learning_rate": 8.983413590155164e-06,
"loss": 0.089,
"step": 9500
},
{
"epoch": 3.21,
"learning_rate": 8.929909042268593e-06,
"loss": 0.0874,
"step": 10000
},
{
"epoch": 3.37,
"learning_rate": 8.876404494382023e-06,
"loss": 0.0868,
"step": 10500
},
{
"epoch": 3.53,
"learning_rate": 8.822899946495453e-06,
"loss": 0.0868,
"step": 11000
},
{
"epoch": 3.69,
"learning_rate": 8.769395398608883e-06,
"loss": 0.0877,
"step": 11500
},
{
"epoch": 3.85,
"learning_rate": 8.715890850722311e-06,
"loss": 0.0876,
"step": 12000
},
{
"epoch": 4.0,
"eval_loss": 0.07677410542964935,
"eval_runtime": 17.0783,
"eval_samples_per_second": 29.277,
"eval_steps_per_second": 7.319,
"step": 12460
},
{
"epoch": 4.01,
"learning_rate": 8.662386302835741e-06,
"loss": 0.0887,
"step": 12500
},
{
"epoch": 4.17,
"learning_rate": 8.608881754949171e-06,
"loss": 0.0848,
"step": 13000
},
{
"epoch": 4.33,
"learning_rate": 8.555377207062601e-06,
"loss": 0.0825,
"step": 13500
},
{
"epoch": 4.49,
"learning_rate": 8.501872659176031e-06,
"loss": 0.085,
"step": 14000
},
{
"epoch": 4.65,
"learning_rate": 8.44836811128946e-06,
"loss": 0.0858,
"step": 14500
},
{
"epoch": 4.82,
"learning_rate": 8.39486356340289e-06,
"loss": 0.0855,
"step": 15000
},
{
"epoch": 4.98,
"learning_rate": 8.34135901551632e-06,
"loss": 0.0857,
"step": 15500
},
{
"epoch": 5.0,
"eval_loss": 0.07618943601846695,
"eval_runtime": 17.0787,
"eval_samples_per_second": 29.276,
"eval_steps_per_second": 7.319,
"step": 15575
},
{
"epoch": 5.14,
"learning_rate": 8.28785446762975e-06,
"loss": 0.0833,
"step": 16000
},
{
"epoch": 5.3,
"learning_rate": 8.234349919743178e-06,
"loss": 0.0819,
"step": 16500
},
{
"epoch": 5.46,
"learning_rate": 8.180845371856608e-06,
"loss": 0.0842,
"step": 17000
},
{
"epoch": 5.62,
"learning_rate": 8.127340823970038e-06,
"loss": 0.0814,
"step": 17500
},
{
"epoch": 5.78,
"learning_rate": 8.073836276083468e-06,
"loss": 0.0828,
"step": 18000
},
{
"epoch": 5.94,
"learning_rate": 8.020331728196898e-06,
"loss": 0.0847,
"step": 18500
},
{
"epoch": 6.0,
"eval_loss": 0.07566038519144058,
"eval_runtime": 17.0768,
"eval_samples_per_second": 29.28,
"eval_steps_per_second": 7.32,
"step": 18690
},
{
"epoch": 6.1,
"learning_rate": 7.966827180310326e-06,
"loss": 0.0826,
"step": 19000
},
{
"epoch": 6.26,
"learning_rate": 7.913322632423756e-06,
"loss": 0.0791,
"step": 19500
},
{
"epoch": 6.42,
"learning_rate": 7.859818084537186e-06,
"loss": 0.0809,
"step": 20000
},
{
"epoch": 6.58,
"learning_rate": 7.806313536650616e-06,
"loss": 0.083,
"step": 20500
},
{
"epoch": 6.74,
"learning_rate": 7.752808988764046e-06,
"loss": 0.0817,
"step": 21000
},
{
"epoch": 6.9,
"learning_rate": 7.699304440877475e-06,
"loss": 0.0815,
"step": 21500
},
{
"epoch": 7.0,
"eval_loss": 0.07512963563203812,
"eval_runtime": 17.0959,
"eval_samples_per_second": 29.247,
"eval_steps_per_second": 7.312,
"step": 21805
},
{
"epoch": 7.06,
"learning_rate": 7.645799892990905e-06,
"loss": 0.0812,
"step": 22000
},
{
"epoch": 7.22,
"learning_rate": 7.592295345104335e-06,
"loss": 0.0805,
"step": 22500
},
{
"epoch": 7.38,
"learning_rate": 7.538790797217765e-06,
"loss": 0.0808,
"step": 23000
},
{
"epoch": 7.54,
"learning_rate": 7.485286249331194e-06,
"loss": 0.0792,
"step": 23500
},
{
"epoch": 7.7,
"learning_rate": 7.431781701444624e-06,
"loss": 0.0792,
"step": 24000
},
{
"epoch": 7.87,
"learning_rate": 7.378277153558053e-06,
"loss": 0.0807,
"step": 24500
},
{
"epoch": 8.0,
"eval_loss": 0.07496295124292374,
"eval_runtime": 17.08,
"eval_samples_per_second": 29.274,
"eval_steps_per_second": 7.319,
"step": 24920
},
{
"epoch": 8.03,
"learning_rate": 7.324772605671483e-06,
"loss": 0.0784,
"step": 25000
},
{
"epoch": 8.19,
"learning_rate": 7.271268057784913e-06,
"loss": 0.0776,
"step": 25500
},
{
"epoch": 8.35,
"learning_rate": 7.217763509898342e-06,
"loss": 0.0764,
"step": 26000
},
{
"epoch": 8.51,
"learning_rate": 7.164258962011772e-06,
"loss": 0.0792,
"step": 26500
},
{
"epoch": 8.67,
"learning_rate": 7.110754414125201e-06,
"loss": 0.0802,
"step": 27000
},
{
"epoch": 8.83,
"learning_rate": 7.057249866238631e-06,
"loss": 0.0803,
"step": 27500
},
{
"epoch": 8.99,
"learning_rate": 7.003745318352061e-06,
"loss": 0.0779,
"step": 28000
},
{
"epoch": 9.0,
"eval_loss": 0.0747738629579544,
"eval_runtime": 17.0877,
"eval_samples_per_second": 29.261,
"eval_steps_per_second": 7.315,
"step": 28035
},
{
"epoch": 9.15,
"learning_rate": 6.950240770465491e-06,
"loss": 0.0765,
"step": 28500
},
{
"epoch": 9.31,
"learning_rate": 6.896736222578921e-06,
"loss": 0.0762,
"step": 29000
},
{
"epoch": 9.47,
"learning_rate": 6.84323167469235e-06,
"loss": 0.0777,
"step": 29500
},
{
"epoch": 9.63,
"learning_rate": 6.789727126805778e-06,
"loss": 0.078,
"step": 30000
},
{
"epoch": 9.79,
"learning_rate": 6.736222578919208e-06,
"loss": 0.0782,
"step": 30500
},
{
"epoch": 9.95,
"learning_rate": 6.682718031032638e-06,
"loss": 0.0757,
"step": 31000
},
{
"epoch": 10.0,
"eval_loss": 0.07480964064598083,
"eval_runtime": 17.0749,
"eval_samples_per_second": 29.283,
"eval_steps_per_second": 7.321,
"step": 31150
}
],
"max_steps": 93450,
"num_train_epochs": 30,
"total_flos": 8.53207661150208e+16,
"trial_name": null,
"trial_params": null
}