|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 6400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 2.6055, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.125e-06, |
|
"loss": 2.6479, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 2.6039, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.6115, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 2.5965, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 2.5992, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 2.6191, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.5917, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.4062500000000001e-05, |
|
"loss": 2.5972, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 2.5517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.71875e-05, |
|
"loss": 2.5635, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.5634, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0312500000000002e-05, |
|
"loss": 2.554, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 2.559, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 2.5601, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.5275, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6562500000000002e-05, |
|
"loss": 2.5367, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 2.5423, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.96875e-05, |
|
"loss": 2.4997, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.5041, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.2812500000000005e-05, |
|
"loss": 2.5048, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 2.4889, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.59375e-05, |
|
"loss": 2.507, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.4832, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 2.4661, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 2.4704, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.21875e-05, |
|
"loss": 2.4762, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.4511, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5312500000000004e-05, |
|
"loss": 2.4563, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 2.4755, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8437500000000005e-05, |
|
"loss": 2.483, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4765, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.15625e-05, |
|
"loss": 2.4527, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.3125000000000004e-05, |
|
"loss": 2.467, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.46875e-05, |
|
"loss": 2.4261, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 2.4484, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.78125e-05, |
|
"loss": 2.428, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5.9375e-05, |
|
"loss": 2.4456, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.0937500000000004e-05, |
|
"loss": 2.4489, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.25e-05, |
|
"loss": 2.4524, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.40625e-05, |
|
"loss": 2.4575, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.562500000000001e-05, |
|
"loss": 2.434, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.71875e-05, |
|
"loss": 2.4343, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.875e-05, |
|
"loss": 2.4481, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.031250000000001e-05, |
|
"loss": 2.4392, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.1875e-05, |
|
"loss": 2.4047, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.34375e-05, |
|
"loss": 2.4314, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.4137, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.65625e-05, |
|
"loss": 2.4424, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.8125e-05, |
|
"loss": 2.409, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.96875e-05, |
|
"loss": 2.4307, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 2.4222, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.28125e-05, |
|
"loss": 2.4247, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.4375e-05, |
|
"loss": 2.4073, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.593750000000001e-05, |
|
"loss": 2.4269, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.75e-05, |
|
"loss": 2.4198, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.90625e-05, |
|
"loss": 2.436, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.062500000000001e-05, |
|
"loss": 2.4122, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.21875e-05, |
|
"loss": 2.4047, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.375e-05, |
|
"loss": 2.4164, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.53125e-05, |
|
"loss": 2.4175, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.687500000000001e-05, |
|
"loss": 2.3834, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.84375e-05, |
|
"loss": 2.4239, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4192, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5911958376190094, |
|
"eval_loss": 2.2528364658355713, |
|
"eval_runtime": 50783.1906, |
|
"eval_samples_per_second": 6.456, |
|
"eval_steps_per_second": 0.807, |
|
"step": 6400 |
|
} |
|
], |
|
"max_steps": 6400, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 2.707934115004416e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|