{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.0, "global_step": 999, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.2262943855309169e-05, "loss": 3.1653, "step": 27 }, { "epoch": 1.0, "eval_accuracy": 0.547526790579403, "eval_loss": 2.819357395172119, "eval_runtime": 8.7539, "eval_samples_per_second": 50.72, "eval_steps_per_second": 0.228, "step": 27 }, { "epoch": 2.0, "learning_rate": 1.4841962570206113e-05, "loss": 2.7725, "step": 54 }, { "epoch": 2.0, "eval_accuracy": 0.5574515806383172, "eval_loss": 2.6864047050476074, "eval_runtime": 7.4551, "eval_samples_per_second": 59.556, "eval_steps_per_second": 0.268, "step": 54 }, { "epoch": 3.0, "learning_rate": 1.6350591807078892e-05, "loss": 2.6256, "step": 81 }, { "epoch": 3.0, "eval_accuracy": 0.5682560989844753, "eval_loss": 2.602532148361206, "eval_runtime": 7.5503, "eval_samples_per_second": 58.806, "eval_steps_per_second": 0.265, "step": 81 }, { "epoch": 4.0, "learning_rate": 1.7420981285103056e-05, "loss": 2.5044, "step": 108 }, { "epoch": 4.0, "eval_accuracy": 0.601172152498224, "eval_loss": 2.341265916824341, "eval_runtime": 7.5499, "eval_samples_per_second": 58.809, "eval_steps_per_second": 0.265, "step": 108 }, { "epoch": 5.0, "learning_rate": 1.825123986666868e-05, "loss": 2.4348, "step": 135 }, { "epoch": 5.0, "eval_accuracy": 0.6008952865794787, "eval_loss": 2.3422300815582275, "eval_runtime": 7.5767, "eval_samples_per_second": 58.601, "eval_steps_per_second": 0.264, "step": 135 }, { "epoch": 6.0, "learning_rate": 1.892961052197583e-05, "loss": 2.42, "step": 162 }, { "epoch": 6.0, "eval_accuracy": 0.5931487564523698, "eval_loss": 2.3707046508789062, "eval_runtime": 7.5549, "eval_samples_per_second": 58.77, "eval_steps_per_second": 0.265, "step": 162 }, { "epoch": 7.0, "learning_rate": 1.9503164738653782e-05, "loss": 2.334, "step": 189 }, { "epoch": 7.0, "eval_accuracy": 0.6161689510150978, "eval_loss": 2.23409366607666, "eval_runtime": 7.5922, "eval_samples_per_second": 58.481, "eval_steps_per_second": 0.263, "step": 189 }, { "epoch": 8.0, "learning_rate": 1.9999999999999998e-05, "loss": 2.288, "step": 216 }, { "epoch": 8.0, "eval_accuracy": 0.6175385999100584, "eval_loss": 2.2085084915161133, "eval_runtime": 7.5018, "eval_samples_per_second": 59.186, "eval_steps_per_second": 0.267, "step": 216 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 2.2954, "step": 243 }, { "epoch": 9.0, "eval_accuracy": 0.6139404159640247, "eval_loss": 2.2152740955352783, "eval_runtime": 7.565, "eval_samples_per_second": 58.691, "eval_steps_per_second": 0.264, "step": 243 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 2.2934, "step": 270 }, { "epoch": 10.0, "eval_accuracy": 0.6153211306628938, "eval_loss": 2.232224225997925, "eval_runtime": 7.5858, "eval_samples_per_second": 58.531, "eval_steps_per_second": 0.264, "step": 270 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 2.2283, "step": 297 }, { "epoch": 11.0, "eval_accuracy": 0.6139165373025295, "eval_loss": 2.231621503829956, "eval_runtime": 6.8069, "eval_samples_per_second": 65.228, "eval_steps_per_second": 0.294, "step": 297 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 2.2228, "step": 324 }, { "epoch": 12.0, "eval_accuracy": 0.6216490772616965, "eval_loss": 2.1547019481658936, "eval_runtime": 7.5498, "eval_samples_per_second": 58.809, "eval_steps_per_second": 0.265, "step": 324 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 2.2178, "step": 351 }, { "epoch": 13.0, "eval_accuracy": 0.6255061354433901, "eval_loss": 2.1324307918548584, "eval_runtime": 7.037, "eval_samples_per_second": 63.095, "eval_steps_per_second": 0.284, "step": 351 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 2.1743, "step": 378 }, { "epoch": 14.0, "eval_accuracy": 0.6387313908007641, "eval_loss": 2.018895387649536, "eval_runtime": 7.5158, "eval_samples_per_second": 59.076, "eval_steps_per_second": 0.266, "step": 378 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 2.0488, "step": 405 }, { "epoch": 15.0, "eval_accuracy": 0.6706725076959508, "eval_loss": 1.7761304378509521, "eval_runtime": 6.6483, "eval_samples_per_second": 66.784, "eval_steps_per_second": 0.301, "step": 405 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.9293, "step": 432 }, { "epoch": 16.0, "eval_accuracy": 0.6976124177972629, "eval_loss": 1.5710580348968506, "eval_runtime": 7.0351, "eval_samples_per_second": 63.112, "eval_steps_per_second": 0.284, "step": 432 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.824, "step": 459 }, { "epoch": 17.0, "eval_accuracy": 0.7190793783123921, "eval_loss": 1.4179129600524902, "eval_runtime": 7.5577, "eval_samples_per_second": 58.748, "eval_steps_per_second": 0.265, "step": 459 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.677, "step": 486 }, { "epoch": 18.0, "eval_accuracy": 0.7287676129762578, "eval_loss": 1.3340463638305664, "eval_runtime": 7.6139, "eval_samples_per_second": 58.315, "eval_steps_per_second": 0.263, "step": 486 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.5681, "step": 513 }, { "epoch": 19.0, "eval_accuracy": 0.7393688419248894, "eval_loss": 1.262519121170044, "eval_runtime": 7.6652, "eval_samples_per_second": 57.924, "eval_steps_per_second": 0.261, "step": 513 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.4292, "step": 540 }, { "epoch": 20.0, "eval_accuracy": 0.7635153050702649, "eval_loss": 1.123979091644287, "eval_runtime": 7.5316, "eval_samples_per_second": 58.951, "eval_steps_per_second": 0.266, "step": 540 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.3347, "step": 567 }, { "epoch": 21.0, "eval_accuracy": 0.7732615942452775, "eval_loss": 1.0541319847106934, "eval_runtime": 7.5273, "eval_samples_per_second": 58.985, "eval_steps_per_second": 0.266, "step": 567 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.2435, "step": 594 }, { "epoch": 22.0, "eval_accuracy": 0.7811072373166627, "eval_loss": 1.0116688013076782, "eval_runtime": 7.6371, "eval_samples_per_second": 58.137, "eval_steps_per_second": 0.262, "step": 594 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.1747, "step": 621 }, { "epoch": 23.0, "eval_accuracy": 0.7842111556166677, "eval_loss": 0.9821351766586304, "eval_runtime": 7.6429, "eval_samples_per_second": 58.093, "eval_steps_per_second": 0.262, "step": 621 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.1536, "step": 648 }, { "epoch": 24.0, "eval_accuracy": 0.7867322173128384, "eval_loss": 0.964832603931427, "eval_runtime": 6.912, "eval_samples_per_second": 64.236, "eval_steps_per_second": 0.289, "step": 648 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.0971, "step": 675 }, { "epoch": 25.0, "eval_accuracy": 0.7866705302810779, "eval_loss": 0.9783045053482056, "eval_runtime": 7.5744, "eval_samples_per_second": 58.618, "eval_steps_per_second": 0.264, "step": 675 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.0809, "step": 702 }, { "epoch": 26.0, "eval_accuracy": 0.7918671517427532, "eval_loss": 0.9400666356086731, "eval_runtime": 7.1416, "eval_samples_per_second": 62.171, "eval_steps_per_second": 0.28, "step": 702 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.0653, "step": 729 }, { "epoch": 27.0, "eval_accuracy": 0.7834480395236599, "eval_loss": 0.9835608005523682, "eval_runtime": 7.5584, "eval_samples_per_second": 58.743, "eval_steps_per_second": 0.265, "step": 729 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.0341, "step": 756 }, { "epoch": 28.0, "eval_accuracy": 0.7897628339837887, "eval_loss": 0.9648067951202393, "eval_runtime": 7.5415, "eval_samples_per_second": 58.874, "eval_steps_per_second": 0.265, "step": 756 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.0056, "step": 783 }, { "epoch": 29.0, "eval_accuracy": 0.7985819252651742, "eval_loss": 0.9186079502105713, "eval_runtime": 7.5907, "eval_samples_per_second": 58.492, "eval_steps_per_second": 0.263, "step": 783 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.9993, "step": 810 }, { "epoch": 30.0, "eval_accuracy": 0.7981126511353583, "eval_loss": 0.9101386666297913, "eval_runtime": 7.191, "eval_samples_per_second": 61.744, "eval_steps_per_second": 0.278, "step": 810 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 0.9849, "step": 837 }, { "epoch": 31.0, "eval_accuracy": 0.7893089053803339, "eval_loss": 0.9685505628585815, "eval_runtime": 7.5619, "eval_samples_per_second": 58.716, "eval_steps_per_second": 0.264, "step": 837 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 0.9799, "step": 864 }, { "epoch": 32.0, "eval_accuracy": 0.7988627229152401, "eval_loss": 0.9036659002304077, "eval_runtime": 7.5514, "eval_samples_per_second": 58.797, "eval_steps_per_second": 0.265, "step": 864 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 0.957, "step": 891 }, { "epoch": 33.0, "eval_accuracy": 0.7937915742793792, "eval_loss": 0.9258220195770264, "eval_runtime": 7.548, "eval_samples_per_second": 58.824, "eval_steps_per_second": 0.265, "step": 891 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 0.9535, "step": 918 }, { "epoch": 34.0, "eval_accuracy": 0.7996154854496198, "eval_loss": 0.8886759877204895, "eval_runtime": 7.5277, "eval_samples_per_second": 58.982, "eval_steps_per_second": 0.266, "step": 918 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 0.9343, "step": 945 }, { "epoch": 35.0, "eval_accuracy": 0.8085179621980223, "eval_loss": 0.8577904105186462, "eval_runtime": 6.7518, "eval_samples_per_second": 65.76, "eval_steps_per_second": 0.296, "step": 945 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 0.9346, "step": 972 }, { "epoch": 36.0, "eval_accuracy": 0.8009545281527245, "eval_loss": 0.8876005411148071, "eval_runtime": 6.6328, "eval_samples_per_second": 66.94, "eval_steps_per_second": 0.302, "step": 972 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 0.9187, "step": 999 }, { "epoch": 37.0, "eval_accuracy": 0.8115537848605577, "eval_loss": 0.8389037251472473, "eval_runtime": 7.5847, "eval_samples_per_second": 58.539, "eval_steps_per_second": 0.264, "step": 999 } ], "max_steps": 1080, "num_train_epochs": 40, "total_flos": 311085194280960.0, "trial_name": null, "trial_params": null }