Mistral-Nemo-12B-Instruct-SFT / trainer_log.jsonl
chchen's picture
Training in progress, step 168
406fbbb verified
{"current_steps": 10, "total_steps": 168, "loss": 1.2612, "learning_rate": 2.647058823529412e-06, "epoch": 0.17777777777777778, "percentage": 5.95, "elapsed_time": "0:00:29", "remaining_time": "0:07:48"}
{"current_steps": 20, "total_steps": 168, "loss": 1.2578, "learning_rate": 4.997836020254328e-06, "epoch": 0.35555555555555557, "percentage": 11.9, "elapsed_time": "0:00:57", "remaining_time": "0:07:06"}
{"current_steps": 30, "total_steps": 168, "loss": 1.0594, "learning_rate": 4.922489359292928e-06, "epoch": 0.5333333333333333, "percentage": 17.86, "elapsed_time": "0:01:25", "remaining_time": "0:06:32"}
{"current_steps": 40, "total_steps": 168, "loss": 0.7524, "learning_rate": 4.7426609101991605e-06, "epoch": 0.7111111111111111, "percentage": 23.81, "elapsed_time": "0:01:53", "remaining_time": "0:06:03"}
{"current_steps": 50, "total_steps": 168, "loss": 0.5438, "learning_rate": 4.466106660773884e-06, "epoch": 0.8888888888888888, "percentage": 29.76, "elapsed_time": "0:02:21", "remaining_time": "0:05:34"}
{"current_steps": 60, "total_steps": 168, "loss": 0.481, "learning_rate": 4.104754375481665e-06, "epoch": 1.0666666666666667, "percentage": 35.71, "elapsed_time": "0:02:49", "remaining_time": "0:05:05"}
{"current_steps": 70, "total_steps": 168, "loss": 0.2657, "learning_rate": 3.674189151845515e-06, "epoch": 1.2444444444444445, "percentage": 41.67, "elapsed_time": "0:03:17", "remaining_time": "0:04:36"}
{"current_steps": 80, "total_steps": 168, "loss": 0.1834, "learning_rate": 3.1929812363354766e-06, "epoch": 1.4222222222222223, "percentage": 47.62, "elapsed_time": "0:03:45", "remaining_time": "0:04:08"}
{"current_steps": 90, "total_steps": 168, "loss": 0.164, "learning_rate": 2.68188509100236e-06, "epoch": 1.6, "percentage": 53.57, "elapsed_time": "0:04:13", "remaining_time": "0:03:40"}
{"current_steps": 100, "total_steps": 168, "loss": 0.1039, "learning_rate": 2.1629442550539283e-06, "epoch": 1.7777777777777777, "percentage": 59.52, "elapsed_time": "0:04:41", "remaining_time": "0:03:11"}
{"current_steps": 110, "total_steps": 168, "loss": 0.1299, "learning_rate": 1.6585406086279847e-06, "epoch": 1.9555555555555557, "percentage": 65.48, "elapsed_time": "0:05:10", "remaining_time": "0:02:43"}
{"current_steps": 120, "total_steps": 168, "loss": 0.0871, "learning_rate": 1.1904290439459974e-06, "epoch": 2.1333333333333333, "percentage": 71.43, "elapsed_time": "0:05:38", "remaining_time": "0:02:15"}
{"current_steps": 130, "total_steps": 168, "loss": 0.1068, "learning_rate": 7.787991784095e-07, "epoch": 2.311111111111111, "percentage": 77.38, "elapsed_time": "0:06:05", "remaining_time": "0:01:46"}
{"current_steps": 140, "total_steps": 168, "loss": 0.1238, "learning_rate": 4.414045778845144e-07, "epoch": 2.488888888888889, "percentage": 83.33, "elapsed_time": "0:06:33", "remaining_time": "0:01:18"}
{"current_steps": 150, "total_steps": 168, "loss": 0.1024, "learning_rate": 1.927970467097573e-07, "epoch": 2.6666666666666665, "percentage": 89.29, "elapsed_time": "0:07:01", "remaining_time": "0:00:50"}
{"current_steps": 160, "total_steps": 168, "loss": 0.0923, "learning_rate": 4.369900944435734e-08, "epoch": 2.8444444444444446, "percentage": 95.24, "elapsed_time": "0:07:29", "remaining_time": "0:00:22"}
{"current_steps": 168, "total_steps": 168, "epoch": 2.986666666666667, "percentage": 100.0, "elapsed_time": "0:07:53", "remaining_time": "0:00:00"}