{ "best_metric": 0.46716374158859253, "best_model_checkpoint": "edukasi-eksyar/checkpoint-150", "epoch": 2.0, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 8.004520416259766, "learning_rate": 2.0000000000000003e-06, "loss": 0.7474, "step": 3 }, { "epoch": 0.08, "grad_norm": 4.838222980499268, "learning_rate": 4.000000000000001e-06, "loss": 0.7234, "step": 6 }, { "epoch": 0.12, "grad_norm": 6.006165981292725, "learning_rate": 6e-06, "loss": 0.6806, "step": 9 }, { "epoch": 0.16, "grad_norm": 5.214205265045166, "learning_rate": 8.000000000000001e-06, "loss": 0.6827, "step": 12 }, { "epoch": 0.2, "grad_norm": Infinity, "learning_rate": 9.333333333333334e-06, "loss": 0.6964, "step": 15 }, { "epoch": 0.24, "grad_norm": 4.9306416511535645, "learning_rate": 1.1333333333333334e-05, "loss": 0.7053, "step": 18 }, { "epoch": 0.28, "grad_norm": 4.851315975189209, "learning_rate": 1.3333333333333333e-05, "loss": 0.7157, "step": 21 }, { "epoch": 0.32, "grad_norm": 5.420584678649902, "learning_rate": 1.5333333333333334e-05, "loss": 0.6733, "step": 24 }, { "epoch": 0.36, "grad_norm": 5.671692848205566, "learning_rate": 1.7333333333333336e-05, "loss": 0.7817, "step": 27 }, { "epoch": 0.4, "grad_norm": 4.638021469116211, "learning_rate": 1.9333333333333333e-05, "loss": 0.6695, "step": 30 }, { "epoch": 0.44, "grad_norm": 5.361118793487549, "learning_rate": 2.1333333333333335e-05, "loss": 0.6291, "step": 33 }, { "epoch": 0.48, "grad_norm": 4.608893871307373, "learning_rate": 2.3333333333333336e-05, "loss": 0.62, "step": 36 }, { "epoch": 0.52, "grad_norm": 4.534895420074463, "learning_rate": 2.5333333333333337e-05, "loss": 0.5813, "step": 39 }, { "epoch": 0.56, "grad_norm": 10.90993595123291, "learning_rate": 2.733333333333333e-05, "loss": 0.5732, "step": 42 }, { "epoch": 0.6, "grad_norm": 11.462884902954102, "learning_rate": 2.9333333333333336e-05, "loss": 0.8245, "step": 45 }, { "epoch": 0.64, "grad_norm": 5.430261611938477, "learning_rate": 3.1333333333333334e-05, "loss": 0.4529, "step": 48 }, { "epoch": 0.68, "grad_norm": 7.559244155883789, "learning_rate": 3.3333333333333335e-05, "loss": 0.5846, "step": 51 }, { "epoch": 0.72, "grad_norm": 6.550118446350098, "learning_rate": 3.5333333333333336e-05, "loss": 0.6978, "step": 54 }, { "epoch": 0.76, "grad_norm": 4.697792053222656, "learning_rate": 3.733333333333334e-05, "loss": 0.724, "step": 57 }, { "epoch": 0.8, "grad_norm": 6.832012176513672, "learning_rate": 3.933333333333333e-05, "loss": 0.6196, "step": 60 }, { "epoch": 0.84, "grad_norm": 4.360001087188721, "learning_rate": 4.133333333333333e-05, "loss": 0.4207, "step": 63 }, { "epoch": 0.88, "grad_norm": 5.691288948059082, "learning_rate": 4.3333333333333334e-05, "loss": 0.4241, "step": 66 }, { "epoch": 0.92, "grad_norm": 13.889620780944824, "learning_rate": 4.5333333333333335e-05, "loss": 0.6089, "step": 69 }, { "epoch": 0.96, "grad_norm": 7.6536865234375, "learning_rate": 4.7333333333333336e-05, "loss": 0.5699, "step": 72 }, { "epoch": 1.0, "grad_norm": 1.8993226289749146, "learning_rate": 4.933333333333334e-05, "loss": 0.3402, "step": 75 }, { "epoch": 1.0, "eval_accuracy": 0.7516778523489933, "eval_auc": 0.8772072072072072, "eval_f1": 0.7861271676300579, "eval_loss": 0.5175233483314514, "eval_precision": 0.6868686868686869, "eval_recall": 0.918918918918919, "eval_runtime": 0.3003, "eval_samples_per_second": 496.248, "eval_steps_per_second": 33.305, "step": 75 }, { "epoch": 1.04, "grad_norm": 8.317619323730469, "learning_rate": 4.9851851851851855e-05, "loss": 0.3189, "step": 78 }, { "epoch": 1.08, "grad_norm": 4.061596870422363, "learning_rate": 4.962962962962963e-05, "loss": 0.2408, "step": 81 }, { "epoch": 1.12, "grad_norm": 9.929864883422852, "learning_rate": 4.940740740740741e-05, "loss": 0.4488, "step": 84 }, { "epoch": 1.16, "grad_norm": 13.264701843261719, "learning_rate": 4.918518518518519e-05, "loss": 0.3286, "step": 87 }, { "epoch": 1.2, "grad_norm": 10.807778358459473, "learning_rate": 4.896296296296297e-05, "loss": 0.3772, "step": 90 }, { "epoch": 1.24, "grad_norm": 7.425344944000244, "learning_rate": 4.874074074074074e-05, "loss": 0.1312, "step": 93 }, { "epoch": 1.28, "grad_norm": 9.521306037902832, "learning_rate": 4.851851851851852e-05, "loss": 0.3179, "step": 96 }, { "epoch": 1.32, "grad_norm": 31.535367965698242, "learning_rate": 4.82962962962963e-05, "loss": 0.3478, "step": 99 }, { "epoch": 1.3599999999999999, "grad_norm": 27.85064125061035, "learning_rate": 4.814814814814815e-05, "loss": 0.4613, "step": 102 }, { "epoch": 1.4, "grad_norm": 10.678465843200684, "learning_rate": 4.792592592592593e-05, "loss": 1.0055, "step": 105 }, { "epoch": 1.44, "grad_norm": 15.3538236618042, "learning_rate": 4.770370370370371e-05, "loss": 0.1905, "step": 108 }, { "epoch": 1.48, "grad_norm": 18.999788284301758, "learning_rate": 4.7481481481481483e-05, "loss": 0.3383, "step": 111 }, { "epoch": 1.52, "grad_norm": 9.190664291381836, "learning_rate": 4.7259259259259266e-05, "loss": 0.4572, "step": 114 }, { "epoch": 1.56, "grad_norm": 12.094901084899902, "learning_rate": 4.703703703703704e-05, "loss": 0.4445, "step": 117 }, { "epoch": 1.6, "grad_norm": 8.674372673034668, "learning_rate": 4.681481481481482e-05, "loss": 0.3431, "step": 120 }, { "epoch": 1.6400000000000001, "grad_norm": 0.5121658444404602, "learning_rate": 4.6592592592592595e-05, "loss": 0.3897, "step": 123 }, { "epoch": 1.6800000000000002, "grad_norm": 8.509323120117188, "learning_rate": 4.637037037037038e-05, "loss": 0.4224, "step": 126 }, { "epoch": 1.72, "grad_norm": 13.501930236816406, "learning_rate": 4.6148148148148154e-05, "loss": 0.2524, "step": 129 }, { "epoch": 1.76, "grad_norm": 20.332229614257812, "learning_rate": 4.592592592592593e-05, "loss": 0.6092, "step": 132 }, { "epoch": 1.8, "grad_norm": 7.666742324829102, "learning_rate": 4.5703703703703706e-05, "loss": 0.2536, "step": 135 }, { "epoch": 1.8399999999999999, "grad_norm": 6.118281364440918, "learning_rate": 4.548148148148149e-05, "loss": 0.2912, "step": 138 }, { "epoch": 1.88, "grad_norm": 5.993144512176514, "learning_rate": 4.5259259259259265e-05, "loss": 0.357, "step": 141 }, { "epoch": 1.92, "grad_norm": 15.134876251220703, "learning_rate": 4.503703703703704e-05, "loss": 0.2685, "step": 144 }, { "epoch": 1.96, "grad_norm": 0.8912419676780701, "learning_rate": 4.481481481481482e-05, "loss": 0.2885, "step": 147 }, { "epoch": 2.0, "grad_norm": 0.28381115198135376, "learning_rate": 4.4592592592592594e-05, "loss": 0.3773, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.7986577181208053, "eval_auc": 0.9084684684684685, "eval_f1": 0.810126582278481, "eval_loss": 0.46716374158859253, "eval_precision": 0.7619047619047619, "eval_recall": 0.8648648648648649, "eval_runtime": 0.3042, "eval_samples_per_second": 489.871, "eval_steps_per_second": 32.877, "step": 150 } ], "logging_steps": 3, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 78012427914240.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }