{ "best_metric": 1.2864845991134644, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.0033690735889898675, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.684536794494934e-05, "eval_loss": 1.7877610921859741, "eval_runtime": 1523.965, "eval_samples_per_second": 16.401, "eval_steps_per_second": 4.1, "step": 1 }, { "epoch": 0.00016845367944949338, "grad_norm": 2.426577091217041, "learning_rate": 4.34e-05, "loss": 1.3885, "step": 10 }, { "epoch": 0.00033690735889898676, "grad_norm": 1.8919321298599243, "learning_rate": 8.68e-05, "loss": 1.2056, "step": 20 }, { "epoch": 0.0005053610383484802, "grad_norm": 1.6490509510040283, "learning_rate": 0.0001302, "loss": 1.1562, "step": 30 }, { "epoch": 0.0006738147177979735, "grad_norm": 1.9635969400405884, "learning_rate": 0.0001736, "loss": 1.2593, "step": 40 }, { "epoch": 0.0008422683972474669, "grad_norm": 2.6593801975250244, "learning_rate": 0.000217, "loss": 1.4405, "step": 50 }, { "epoch": 0.0008422683972474669, "eval_loss": 1.2864845991134644, "eval_runtime": 1523.8808, "eval_samples_per_second": 16.402, "eval_steps_per_second": 4.101, "step": 50 }, { "epoch": 0.0010107220766969603, "grad_norm": 1.269593596458435, "learning_rate": 0.00021673569945319091, "loss": 1.118, "step": 60 }, { "epoch": 0.0011791757561464536, "grad_norm": 1.360644817352295, "learning_rate": 0.00021594408545846038, "loss": 1.1112, "step": 70 }, { "epoch": 0.001347629435595947, "grad_norm": 1.5711684226989746, "learning_rate": 0.0002146290146796179, "loss": 1.2133, "step": 80 }, { "epoch": 0.0015160831150454403, "grad_norm": 2.1071293354034424, "learning_rate": 0.0002127968940093076, "loss": 1.3488, "step": 90 }, { "epoch": 0.0016845367944949338, "grad_norm": 3.1953670978546143, "learning_rate": 0.00021045664935527106, "loss": 1.4487, "step": 100 }, { "epoch": 0.0016845367944949338, "eval_loss": 1.4198808670043945, "eval_runtime": 1523.9284, "eval_samples_per_second": 16.402, "eval_steps_per_second": 4.101, "step": 100 }, { "epoch": 0.0018529904739444272, "grad_norm": 1.4960129261016846, "learning_rate": 0.00020761968215422217, "loss": 1.1833, "step": 110 }, { "epoch": 0.0020214441533939207, "grad_norm": 1.6748398542404175, "learning_rate": 0.00020429981382519356, "loss": 1.1911, "step": 120 }, { "epoch": 0.0021898978328434137, "grad_norm": 1.67674720287323, "learning_rate": 0.00020051321843297219, "loss": 1.166, "step": 130 }, { "epoch": 0.002358351512292907, "grad_norm": 1.8275052309036255, "learning_rate": 0.0001962783438896818, "loss": 1.2584, "step": 140 }, { "epoch": 0.0025268051917424006, "grad_norm": 3.4226462841033936, "learning_rate": 0.0001916158220784091, "loss": 1.4667, "step": 150 }, { "epoch": 0.0025268051917424006, "eval_loss": 1.3877193927764893, "eval_runtime": 1524.1947, "eval_samples_per_second": 16.399, "eval_steps_per_second": 4.1, "step": 150 }, { "epoch": 0.002695258871191894, "grad_norm": 1.4711071252822876, "learning_rate": 0.00018654836833674362, "loss": 1.2392, "step": 160 }, { "epoch": 0.0028637125506413875, "grad_norm": 1.2519170045852661, "learning_rate": 0.0001811006707899361, "loss": 1.1692, "step": 170 }, { "epoch": 0.0030321662300908806, "grad_norm": 1.6822906732559204, "learning_rate": 0.0001752992700728339, "loss": 1.2182, "step": 180 }, { "epoch": 0.003200619909540374, "grad_norm": 1.8712016344070435, "learning_rate": 0.00016917243002657602, "loss": 1.2522, "step": 190 }, { "epoch": 0.0033690735889898675, "grad_norm": 4.799688339233398, "learning_rate": 0.00016275, "loss": 1.4872, "step": 200 }, { "epoch": 0.0033690735889898675, "eval_loss": 1.3280550241470337, "eval_runtime": 1524.6163, "eval_samples_per_second": 16.394, "eval_steps_per_second": 4.099, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.790140366598963e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }