lesso17's picture
Training in progress, step 25, checkpoint
ced155e verified
{
"best_metric": 1.6042733192443848,
"best_model_checkpoint": "miner_id_24/checkpoint-25",
"epoch": 0.0011519675605934938,
"eval_steps": 5,
"global_step": 25,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.607870242373975e-05,
"grad_norm": 0.588746190071106,
"learning_rate": 2.0000000000000003e-06,
"loss": 6.5064,
"step": 1
},
{
"epoch": 4.607870242373975e-05,
"eval_loss": 1.6421566009521484,
"eval_runtime": 4288.6025,
"eval_samples_per_second": 2.131,
"eval_steps_per_second": 1.065,
"step": 1
},
{
"epoch": 9.21574048474795e-05,
"grad_norm": 0.5073788166046143,
"learning_rate": 4.000000000000001e-06,
"loss": 5.3471,
"step": 2
},
{
"epoch": 0.00013823610727121925,
"grad_norm": 0.5721762180328369,
"learning_rate": 6e-06,
"loss": 6.3213,
"step": 3
},
{
"epoch": 0.000184314809694959,
"grad_norm": 0.5545344352722168,
"learning_rate": 8.000000000000001e-06,
"loss": 5.2701,
"step": 4
},
{
"epoch": 0.00023039351211869873,
"grad_norm": 0.5406827926635742,
"learning_rate": 1e-05,
"loss": 5.9878,
"step": 5
},
{
"epoch": 0.00023039351211869873,
"eval_loss": 1.642135500907898,
"eval_runtime": 1312.5288,
"eval_samples_per_second": 6.962,
"eval_steps_per_second": 3.481,
"step": 5
},
{
"epoch": 0.0002764722145424385,
"grad_norm": 0.5396299362182617,
"learning_rate": 1.2e-05,
"loss": 5.6497,
"step": 6
},
{
"epoch": 0.00032255091696617823,
"grad_norm": 0.47290557622909546,
"learning_rate": 1.4e-05,
"loss": 5.5207,
"step": 7
},
{
"epoch": 0.000368629619389918,
"grad_norm": 0.9972706437110901,
"learning_rate": 1.6000000000000003e-05,
"loss": 6.8762,
"step": 8
},
{
"epoch": 0.0004147083218136577,
"grad_norm": 0.5820791721343994,
"learning_rate": 1.8e-05,
"loss": 6.8037,
"step": 9
},
{
"epoch": 0.00046078702423739745,
"grad_norm": 0.5647141337394714,
"learning_rate": 2e-05,
"loss": 6.4019,
"step": 10
},
{
"epoch": 0.00046078702423739745,
"eval_loss": 1.638012170791626,
"eval_runtime": 1312.2279,
"eval_samples_per_second": 6.964,
"eval_steps_per_second": 3.482,
"step": 10
},
{
"epoch": 0.0005068657266611372,
"grad_norm": 1.0510690212249756,
"learning_rate": 1.9781476007338058e-05,
"loss": 8.4169,
"step": 11
},
{
"epoch": 0.000552944429084877,
"grad_norm": 0.5502700209617615,
"learning_rate": 1.913545457642601e-05,
"loss": 5.4723,
"step": 12
},
{
"epoch": 0.0005990231315086167,
"grad_norm": 1.5494643449783325,
"learning_rate": 1.8090169943749477e-05,
"loss": 7.7394,
"step": 13
},
{
"epoch": 0.0006451018339323565,
"grad_norm": 0.8573722243309021,
"learning_rate": 1.6691306063588583e-05,
"loss": 6.3292,
"step": 14
},
{
"epoch": 0.0006911805363560963,
"grad_norm": 0.9812471270561218,
"learning_rate": 1.5000000000000002e-05,
"loss": 6.437,
"step": 15
},
{
"epoch": 0.0006911805363560963,
"eval_loss": 1.6242995262145996,
"eval_runtime": 1312.4542,
"eval_samples_per_second": 6.963,
"eval_steps_per_second": 3.481,
"step": 15
},
{
"epoch": 0.000737259238779836,
"grad_norm": 1.0276433229446411,
"learning_rate": 1.3090169943749475e-05,
"loss": 6.6531,
"step": 16
},
{
"epoch": 0.0007833379412035757,
"grad_norm": 1.0183649063110352,
"learning_rate": 1.1045284632676535e-05,
"loss": 6.6236,
"step": 17
},
{
"epoch": 0.0008294166436273154,
"grad_norm": 1.4187095165252686,
"learning_rate": 8.954715367323468e-06,
"loss": 6.489,
"step": 18
},
{
"epoch": 0.0008754953460510552,
"grad_norm": 0.8043063282966614,
"learning_rate": 6.909830056250527e-06,
"loss": 6.186,
"step": 19
},
{
"epoch": 0.0009215740484747949,
"grad_norm": 1.7660255432128906,
"learning_rate": 5.000000000000003e-06,
"loss": 6.6414,
"step": 20
},
{
"epoch": 0.0009215740484747949,
"eval_loss": 1.608821153640747,
"eval_runtime": 3307.9824,
"eval_samples_per_second": 2.762,
"eval_steps_per_second": 1.381,
"step": 20
},
{
"epoch": 0.0009676527508985347,
"grad_norm": 1.247157096862793,
"learning_rate": 3.308693936411421e-06,
"loss": 5.8863,
"step": 21
},
{
"epoch": 0.0010137314533222744,
"grad_norm": 1.7679110765457153,
"learning_rate": 1.9098300562505266e-06,
"loss": 5.8775,
"step": 22
},
{
"epoch": 0.0010598101557460142,
"grad_norm": 1.592766284942627,
"learning_rate": 8.645454235739903e-07,
"loss": 5.9673,
"step": 23
},
{
"epoch": 0.001105888858169754,
"grad_norm": 1.4257538318634033,
"learning_rate": 2.1852399266194312e-07,
"loss": 6.3244,
"step": 24
},
{
"epoch": 0.0011519675605934938,
"grad_norm": 1.1082109212875366,
"learning_rate": 0.0,
"loss": 5.128,
"step": 25
},
{
"epoch": 0.0011519675605934938,
"eval_loss": 1.6042733192443848,
"eval_runtime": 1312.2555,
"eval_samples_per_second": 6.964,
"eval_steps_per_second": 3.482,
"step": 25
}
],
"logging_steps": 1,
"max_steps": 25,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.211080796798976e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}