| { | |
| "best_metric": 0.09506715089082718, | |
| "best_model_checkpoint": "./output/checkpoint-90000", | |
| "epoch": 1.92, | |
| "eval_steps": 500, | |
| "global_step": 90000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.973333333333334e-05, | |
| "loss": 0.2934, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 0.22030889987945557, | |
| "eval_runtime": 276.3918, | |
| "eval_samples_per_second": 79.597, | |
| "eval_steps_per_second": 9.95, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9466666666666665e-05, | |
| "loss": 0.2131, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 0.18302056193351746, | |
| "eval_runtime": 276.3484, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.92e-05, | |
| "loss": 0.1988, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 0.18218165636062622, | |
| "eval_runtime": 276.3128, | |
| "eval_samples_per_second": 79.62, | |
| "eval_steps_per_second": 9.952, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8933333333333335e-05, | |
| "loss": 0.184, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 0.17341837286949158, | |
| "eval_runtime": 276.3806, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.866666666666667e-05, | |
| "loss": 0.1828, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 0.16815362870693207, | |
| "eval_runtime": 276.4093, | |
| "eval_samples_per_second": 79.592, | |
| "eval_steps_per_second": 9.949, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.8400000000000004e-05, | |
| "loss": 0.1773, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.1621473878622055, | |
| "eval_runtime": 276.4151, | |
| "eval_samples_per_second": 79.59, | |
| "eval_steps_per_second": 9.949, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.8133333333333336e-05, | |
| "loss": 0.1704, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 0.16105687618255615, | |
| "eval_runtime": 276.3655, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.7866666666666674e-05, | |
| "loss": 0.1746, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.15701265633106232, | |
| "eval_runtime": 276.396, | |
| "eval_samples_per_second": 79.596, | |
| "eval_steps_per_second": 9.949, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.76e-05, | |
| "loss": 0.1706, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 0.15178394317626953, | |
| "eval_runtime": 276.3679, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.951, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.7333333333333336e-05, | |
| "loss": 0.1657, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 0.1569342315196991, | |
| "eval_runtime": 276.3463, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.706666666666667e-05, | |
| "loss": 0.1625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 0.15309767425060272, | |
| "eval_runtime": 276.3956, | |
| "eval_samples_per_second": 79.596, | |
| "eval_steps_per_second": 9.95, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.6800000000000006e-05, | |
| "loss": 0.1645, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 0.14644910395145416, | |
| "eval_runtime": 276.4237, | |
| "eval_samples_per_second": 79.588, | |
| "eval_steps_per_second": 9.948, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.653333333333334e-05, | |
| "loss": 0.1653, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 0.153720885515213, | |
| "eval_runtime": 276.3604, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.626666666666667e-05, | |
| "loss": 0.1611, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 0.14402781426906586, | |
| "eval_runtime": 276.3753, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.1587, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 0.14566081762313843, | |
| "eval_runtime": 276.3966, | |
| "eval_samples_per_second": 79.596, | |
| "eval_steps_per_second": 9.949, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.573333333333333e-05, | |
| "loss": 0.156, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 0.14253656566143036, | |
| "eval_runtime": 276.3214, | |
| "eval_samples_per_second": 79.617, | |
| "eval_steps_per_second": 9.952, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.546666666666667e-05, | |
| "loss": 0.1515, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 0.15111665427684784, | |
| "eval_runtime": 276.3261, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.52e-05, | |
| "loss": 0.1545, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.1513848453760147, | |
| "eval_runtime": 276.3488, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.493333333333333e-05, | |
| "loss": 0.1497, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 0.1414230316877365, | |
| "eval_runtime": 276.3155, | |
| "eval_samples_per_second": 79.619, | |
| "eval_steps_per_second": 9.952, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 0.1489, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 0.14184370636940002, | |
| "eval_runtime": 276.3098, | |
| "eval_samples_per_second": 79.621, | |
| "eval_steps_per_second": 9.953, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.44e-05, | |
| "loss": 0.1499, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 0.14466029405593872, | |
| "eval_runtime": 276.3503, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.413333333333334e-05, | |
| "loss": 0.1508, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 0.14089460670948029, | |
| "eval_runtime": 276.3654, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.3866666666666665e-05, | |
| "loss": 0.1456, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 0.13653361797332764, | |
| "eval_runtime": 276.3272, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.36e-05, | |
| "loss": 0.1467, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 0.14200258255004883, | |
| "eval_runtime": 276.3709, | |
| "eval_samples_per_second": 79.603, | |
| "eval_steps_per_second": 9.95, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.1474, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 0.13534672558307648, | |
| "eval_runtime": 276.4143, | |
| "eval_samples_per_second": 79.591, | |
| "eval_steps_per_second": 9.949, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.3066666666666665e-05, | |
| "loss": 0.1459, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.14643684029579163, | |
| "eval_runtime": 276.3252, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.2800000000000004e-05, | |
| "loss": 0.1486, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 0.13475438952445984, | |
| "eval_runtime": 276.3417, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.951, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.2533333333333335e-05, | |
| "loss": 0.1382, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 0.13765749335289001, | |
| "eval_runtime": 276.2943, | |
| "eval_samples_per_second": 79.625, | |
| "eval_steps_per_second": 9.953, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.226666666666667e-05, | |
| "loss": 0.1445, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.13073089718818665, | |
| "eval_runtime": 276.3368, | |
| "eval_samples_per_second": 79.613, | |
| "eval_steps_per_second": 9.952, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1408, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 0.12947334349155426, | |
| "eval_runtime": 276.2912, | |
| "eval_samples_per_second": 79.626, | |
| "eval_steps_per_second": 9.953, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.1733333333333336e-05, | |
| "loss": 0.1415, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 0.13861589133739471, | |
| "eval_runtime": 276.3293, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.146666666666667e-05, | |
| "loss": 0.1395, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.1310950368642807, | |
| "eval_runtime": 276.2775, | |
| "eval_samples_per_second": 79.63, | |
| "eval_steps_per_second": 9.954, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.12e-05, | |
| "loss": 0.1419, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 0.12786632776260376, | |
| "eval_runtime": 276.3699, | |
| "eval_samples_per_second": 79.603, | |
| "eval_steps_per_second": 9.95, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.093333333333334e-05, | |
| "loss": 0.14, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 0.13596266508102417, | |
| "eval_runtime": 276.3619, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.066666666666667e-05, | |
| "loss": 0.1342, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 0.12955187261104584, | |
| "eval_runtime": 276.3881, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.0400000000000006e-05, | |
| "loss": 0.1434, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.12723727524280548, | |
| "eval_runtime": 276.3491, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.013333333333333e-05, | |
| "loss": 0.1355, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 0.1311383992433548, | |
| "eval_runtime": 276.3397, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.952, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.986666666666667e-05, | |
| "loss": 0.1359, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 0.13185973465442657, | |
| "eval_runtime": 276.3311, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.960000000000001e-05, | |
| "loss": 0.1367, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 0.13154073059558868, | |
| "eval_runtime": 276.4261, | |
| "eval_samples_per_second": 79.587, | |
| "eval_steps_per_second": 9.948, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.933333333333333e-05, | |
| "loss": 0.1359, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 0.1254982203245163, | |
| "eval_runtime": 276.356, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.906666666666667e-05, | |
| "loss": 0.1334, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.12259157747030258, | |
| "eval_runtime": 276.3229, | |
| "eval_samples_per_second": 79.617, | |
| "eval_steps_per_second": 9.952, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.88e-05, | |
| "loss": 0.1332, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 0.12745440006256104, | |
| "eval_runtime": 276.3506, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.853333333333334e-05, | |
| "loss": 0.128, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.13060887157917023, | |
| "eval_runtime": 276.3455, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.8266666666666664e-05, | |
| "loss": 0.1317, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 0.12281731516122818, | |
| "eval_runtime": 276.3941, | |
| "eval_samples_per_second": 79.596, | |
| "eval_steps_per_second": 9.95, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.1341, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 0.12897922098636627, | |
| "eval_runtime": 276.4178, | |
| "eval_samples_per_second": 79.59, | |
| "eval_steps_per_second": 9.949, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.773333333333334e-05, | |
| "loss": 0.1299, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 0.12343194335699081, | |
| "eval_runtime": 276.3421, | |
| "eval_samples_per_second": 79.611, | |
| "eval_steps_per_second": 9.951, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 3.7466666666666665e-05, | |
| "loss": 0.1321, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.12348024547100067, | |
| "eval_runtime": 276.3487, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.72e-05, | |
| "loss": 0.1347, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 0.11934816837310791, | |
| "eval_runtime": 276.339, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.952, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 3.6933333333333334e-05, | |
| "loss": 0.1275, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 0.1227826476097107, | |
| "eval_runtime": 276.3176, | |
| "eval_samples_per_second": 79.619, | |
| "eval_steps_per_second": 9.952, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.1274, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.12041452527046204, | |
| "eval_runtime": 276.3865, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 3.6400000000000004e-05, | |
| "loss": 0.1279, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 0.12191332131624222, | |
| "eval_runtime": 276.306, | |
| "eval_samples_per_second": 79.622, | |
| "eval_steps_per_second": 9.953, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 3.6133333333333335e-05, | |
| "loss": 0.1297, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 0.1200505793094635, | |
| "eval_runtime": 276.3642, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.586666666666667e-05, | |
| "loss": 0.1297, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 0.1168510764837265, | |
| "eval_runtime": 276.3697, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.95, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.56e-05, | |
| "loss": 0.1211, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 0.12454237043857574, | |
| "eval_runtime": 276.3776, | |
| "eval_samples_per_second": 79.601, | |
| "eval_steps_per_second": 9.95, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.5333333333333336e-05, | |
| "loss": 0.1229, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.1179661750793457, | |
| "eval_runtime": 276.4214, | |
| "eval_samples_per_second": 79.589, | |
| "eval_steps_per_second": 9.949, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.506666666666667e-05, | |
| "loss": 0.1248, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 0.11693388223648071, | |
| "eval_runtime": 276.4022, | |
| "eval_samples_per_second": 79.594, | |
| "eval_steps_per_second": 9.949, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.48e-05, | |
| "loss": 0.1284, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 0.11763223260641098, | |
| "eval_runtime": 276.4046, | |
| "eval_samples_per_second": 79.593, | |
| "eval_steps_per_second": 9.949, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.453333333333334e-05, | |
| "loss": 0.1276, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 0.11819904297590256, | |
| "eval_runtime": 276.381, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.426666666666667e-05, | |
| "loss": 0.1291, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 0.11751222610473633, | |
| "eval_runtime": 276.3896, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.1248, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 0.11959394812583923, | |
| "eval_runtime": 276.4181, | |
| "eval_samples_per_second": 79.59, | |
| "eval_steps_per_second": 9.949, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.373333333333333e-05, | |
| "loss": 0.1238, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 0.11936372518539429, | |
| "eval_runtime": 276.3317, | |
| "eval_samples_per_second": 79.614, | |
| "eval_steps_per_second": 9.952, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.346666666666667e-05, | |
| "loss": 0.1282, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 0.11961743235588074, | |
| "eval_runtime": 276.3769, | |
| "eval_samples_per_second": 79.601, | |
| "eval_steps_per_second": 9.95, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.32e-05, | |
| "loss": 0.1221, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 0.11670618504285812, | |
| "eval_runtime": 276.3693, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.95, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.293333333333333e-05, | |
| "loss": 0.1257, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.11521819233894348, | |
| "eval_runtime": 276.3847, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.266666666666667e-05, | |
| "loss": 0.1228, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 0.11396034806966782, | |
| "eval_runtime": 276.3846, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.24e-05, | |
| "loss": 0.1208, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 0.11849334090948105, | |
| "eval_runtime": 276.4084, | |
| "eval_samples_per_second": 79.592, | |
| "eval_steps_per_second": 9.949, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.213333333333334e-05, | |
| "loss": 0.1232, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 0.11673286557197571, | |
| "eval_runtime": 276.3354, | |
| "eval_samples_per_second": 79.613, | |
| "eval_steps_per_second": 9.952, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.1866666666666664e-05, | |
| "loss": 0.124, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 0.11717826873064041, | |
| "eval_runtime": 276.3184, | |
| "eval_samples_per_second": 79.618, | |
| "eval_steps_per_second": 9.952, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.16e-05, | |
| "loss": 0.1207, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 0.11567293852567673, | |
| "eval_runtime": 276.3296, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1333333333333334e-05, | |
| "loss": 0.125, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 0.11241094768047333, | |
| "eval_runtime": 276.2554, | |
| "eval_samples_per_second": 79.636, | |
| "eval_steps_per_second": 9.955, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.1066666666666665e-05, | |
| "loss": 0.1247, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 0.11267486214637756, | |
| "eval_runtime": 276.2607, | |
| "eval_samples_per_second": 79.635, | |
| "eval_steps_per_second": 9.954, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.08e-05, | |
| "loss": 0.1255, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.11678201705217361, | |
| "eval_runtime": 276.3178, | |
| "eval_samples_per_second": 79.618, | |
| "eval_steps_per_second": 9.952, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.0533333333333335e-05, | |
| "loss": 0.1195, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 0.11176645755767822, | |
| "eval_runtime": 276.2722, | |
| "eval_samples_per_second": 79.632, | |
| "eval_steps_per_second": 9.954, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.0266666666666666e-05, | |
| "loss": 0.1163, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 0.11289627104997635, | |
| "eval_runtime": 276.2532, | |
| "eval_samples_per_second": 79.637, | |
| "eval_steps_per_second": 9.955, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3e-05, | |
| "loss": 0.1223, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.11785981804132462, | |
| "eval_runtime": 276.3315, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.9733333333333336e-05, | |
| "loss": 0.1157, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 0.10976187139749527, | |
| "eval_runtime": 276.3207, | |
| "eval_samples_per_second": 79.618, | |
| "eval_steps_per_second": 9.952, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.946666666666667e-05, | |
| "loss": 0.1162, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 0.11584267020225525, | |
| "eval_runtime": 276.2825, | |
| "eval_samples_per_second": 79.629, | |
| "eval_steps_per_second": 9.954, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 2.9199999999999998e-05, | |
| "loss": 0.122, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 0.10863669961690903, | |
| "eval_runtime": 276.3141, | |
| "eval_samples_per_second": 79.62, | |
| "eval_steps_per_second": 9.952, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.8933333333333333e-05, | |
| "loss": 0.1174, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 0.10946591198444366, | |
| "eval_runtime": 276.3294, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 2.8666666666666668e-05, | |
| "loss": 0.1175, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 0.108896404504776, | |
| "eval_runtime": 276.3035, | |
| "eval_samples_per_second": 79.623, | |
| "eval_steps_per_second": 9.953, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.84e-05, | |
| "loss": 0.1186, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 0.1097063198685646, | |
| "eval_runtime": 276.3409, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.951, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.8133333333333334e-05, | |
| "loss": 0.1199, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 0.10556744784116745, | |
| "eval_runtime": 276.3338, | |
| "eval_samples_per_second": 79.614, | |
| "eval_steps_per_second": 9.952, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.786666666666667e-05, | |
| "loss": 0.1166, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 0.10955548286437988, | |
| "eval_runtime": 276.3015, | |
| "eval_samples_per_second": 79.623, | |
| "eval_steps_per_second": 9.953, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 0.1161, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.10918786376714706, | |
| "eval_runtime": 276.3514, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.733333333333333e-05, | |
| "loss": 0.1157, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 0.11171555519104004, | |
| "eval_runtime": 276.3517, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.706666666666667e-05, | |
| "loss": 0.1182, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 0.11243414878845215, | |
| "eval_runtime": 276.3884, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.6800000000000004e-05, | |
| "loss": 0.1175, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 0.1075085699558258, | |
| "eval_runtime": 276.3261, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.6533333333333332e-05, | |
| "loss": 0.114, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 0.11034713685512543, | |
| "eval_runtime": 276.3691, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.95, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.6266666666666667e-05, | |
| "loss": 0.1111, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 0.10648725181818008, | |
| "eval_runtime": 276.3471, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.117, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.10601364821195602, | |
| "eval_runtime": 276.3431, | |
| "eval_samples_per_second": 79.611, | |
| "eval_steps_per_second": 9.951, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.5733333333333337e-05, | |
| "loss": 0.1116, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.11059073358774185, | |
| "eval_runtime": 276.3391, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.952, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.5466666666666668e-05, | |
| "loss": 0.1141, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 0.10485256463289261, | |
| "eval_runtime": 276.3548, | |
| "eval_samples_per_second": 79.608, | |
| "eval_steps_per_second": 9.951, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.5200000000000003e-05, | |
| "loss": 0.1132, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.10640579462051392, | |
| "eval_runtime": 276.3651, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.4933333333333334e-05, | |
| "loss": 0.1086, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.11117997765541077, | |
| "eval_runtime": 276.3626, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.466666666666667e-05, | |
| "loss": 0.0936, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 0.11115837842226028, | |
| "eval_runtime": 276.4156, | |
| "eval_samples_per_second": 79.59, | |
| "eval_steps_per_second": 9.949, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.44e-05, | |
| "loss": 0.0932, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.11185478419065475, | |
| "eval_runtime": 276.3532, | |
| "eval_samples_per_second": 79.608, | |
| "eval_steps_per_second": 9.951, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.4133333333333335e-05, | |
| "loss": 0.0933, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 0.11217908561229706, | |
| "eval_runtime": 276.4039, | |
| "eval_samples_per_second": 79.594, | |
| "eval_steps_per_second": 9.949, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.3866666666666666e-05, | |
| "loss": 0.0884, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 0.11029790341854095, | |
| "eval_runtime": 276.3574, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.36e-05, | |
| "loss": 0.092, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.1108555793762207, | |
| "eval_runtime": 276.394, | |
| "eval_samples_per_second": 79.597, | |
| "eval_steps_per_second": 9.95, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.0953, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 0.11262986063957214, | |
| "eval_runtime": 276.3675, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.951, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.3066666666666667e-05, | |
| "loss": 0.0907, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.11284729838371277, | |
| "eval_runtime": 276.3775, | |
| "eval_samples_per_second": 79.601, | |
| "eval_steps_per_second": 9.95, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 0.0876, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.1101248562335968, | |
| "eval_runtime": 276.3334, | |
| "eval_samples_per_second": 79.614, | |
| "eval_steps_per_second": 9.952, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.2533333333333333e-05, | |
| "loss": 0.0907, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 0.11255918443202972, | |
| "eval_runtime": 276.3903, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.2266666666666668e-05, | |
| "loss": 0.0923, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.11285662651062012, | |
| "eval_runtime": 276.4242, | |
| "eval_samples_per_second": 79.588, | |
| "eval_steps_per_second": 9.948, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.0928, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 0.10658019036054611, | |
| "eval_runtime": 276.4004, | |
| "eval_samples_per_second": 79.595, | |
| "eval_steps_per_second": 9.949, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.1733333333333334e-05, | |
| "loss": 0.0879, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.11677614599466324, | |
| "eval_runtime": 276.3855, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.146666666666667e-05, | |
| "loss": 0.0945, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.11056175827980042, | |
| "eval_runtime": 276.4008, | |
| "eval_samples_per_second": 79.595, | |
| "eval_steps_per_second": 9.949, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.12e-05, | |
| "loss": 0.0901, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 0.11332684755325317, | |
| "eval_runtime": 276.3935, | |
| "eval_samples_per_second": 79.597, | |
| "eval_steps_per_second": 9.95, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.0933333333333335e-05, | |
| "loss": 0.0912, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 0.1076674833893776, | |
| "eval_runtime": 276.384, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.0666666666666666e-05, | |
| "loss": 0.0892, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.11224553734064102, | |
| "eval_runtime": 276.4145, | |
| "eval_samples_per_second": 79.591, | |
| "eval_steps_per_second": 9.949, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.04e-05, | |
| "loss": 0.0922, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.10986316204071045, | |
| "eval_runtime": 276.417, | |
| "eval_samples_per_second": 79.59, | |
| "eval_steps_per_second": 9.949, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.0133333333333336e-05, | |
| "loss": 0.0933, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.11572857946157455, | |
| "eval_runtime": 276.4334, | |
| "eval_samples_per_second": 79.585, | |
| "eval_steps_per_second": 9.948, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.9866666666666667e-05, | |
| "loss": 0.0889, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.11841318756341934, | |
| "eval_runtime": 276.3761, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 0.0888, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.11024255305528641, | |
| "eval_runtime": 276.3792, | |
| "eval_samples_per_second": 79.601, | |
| "eval_steps_per_second": 9.95, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 0.0895, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.10331451147794724, | |
| "eval_runtime": 276.36, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.9066666666666668e-05, | |
| "loss": 0.0902, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.10581528395414352, | |
| "eval_runtime": 276.4101, | |
| "eval_samples_per_second": 79.592, | |
| "eval_steps_per_second": 9.949, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.88e-05, | |
| "loss": 0.0904, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 0.10806821286678314, | |
| "eval_runtime": 276.3803, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.8533333333333334e-05, | |
| "loss": 0.0904, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.1039455235004425, | |
| "eval_runtime": 276.3739, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.826666666666667e-05, | |
| "loss": 0.0886, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.10832499712705612, | |
| "eval_runtime": 276.3745, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0885, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.10517556965351105, | |
| "eval_runtime": 276.3381, | |
| "eval_samples_per_second": 79.613, | |
| "eval_steps_per_second": 9.952, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.7733333333333335e-05, | |
| "loss": 0.0876, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.10419843345880508, | |
| "eval_runtime": 276.3395, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.952, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.7466666666666667e-05, | |
| "loss": 0.0869, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.10334008932113647, | |
| "eval_runtime": 276.3504, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.7199999999999998e-05, | |
| "loss": 0.0896, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 0.10229332745075226, | |
| "eval_runtime": 276.3403, | |
| "eval_samples_per_second": 79.612, | |
| "eval_steps_per_second": 9.951, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.6933333333333333e-05, | |
| "loss": 0.0878, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.10844781249761581, | |
| "eval_runtime": 276.3431, | |
| "eval_samples_per_second": 79.611, | |
| "eval_steps_per_second": 9.951, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0866, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.11091409623622894, | |
| "eval_runtime": 276.3677, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.951, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.6400000000000002e-05, | |
| "loss": 0.0878, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.10312966257333755, | |
| "eval_runtime": 276.3743, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.6133333333333334e-05, | |
| "loss": 0.087, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 0.10696660727262497, | |
| "eval_runtime": 276.3818, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.586666666666667e-05, | |
| "loss": 0.0911, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_loss": 0.10299216210842133, | |
| "eval_runtime": 276.3666, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.951, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.56e-05, | |
| "loss": 0.0921, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.10728349536657333, | |
| "eval_runtime": 276.4124, | |
| "eval_samples_per_second": 79.591, | |
| "eval_steps_per_second": 9.949, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.5333333333333334e-05, | |
| "loss": 0.0907, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.10825539380311966, | |
| "eval_runtime": 276.348, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.5066666666666668e-05, | |
| "loss": 0.0882, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 0.10357914865016937, | |
| "eval_runtime": 276.3624, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.48e-05, | |
| "loss": 0.09, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.11065088212490082, | |
| "eval_runtime": 276.3707, | |
| "eval_samples_per_second": 79.603, | |
| "eval_steps_per_second": 9.95, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.4533333333333335e-05, | |
| "loss": 0.0875, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.10276373475790024, | |
| "eval_runtime": 276.3568, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.4266666666666667e-05, | |
| "loss": 0.0863, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 0.10605224967002869, | |
| "eval_runtime": 276.3877, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.0848, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 0.10788533091545105, | |
| "eval_runtime": 276.3524, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.3733333333333335e-05, | |
| "loss": 0.0842, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.10388441383838654, | |
| "eval_runtime": 276.3332, | |
| "eval_samples_per_second": 79.614, | |
| "eval_steps_per_second": 9.952, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.3466666666666666e-05, | |
| "loss": 0.0829, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 0.10666974633932114, | |
| "eval_runtime": 276.3643, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.32e-05, | |
| "loss": 0.0887, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 0.10537286847829819, | |
| "eval_runtime": 276.4139, | |
| "eval_samples_per_second": 79.591, | |
| "eval_steps_per_second": 9.949, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.2933333333333334e-05, | |
| "loss": 0.0828, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 0.10367512702941895, | |
| "eval_runtime": 276.3815, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.2666666666666668e-05, | |
| "loss": 0.0834, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.1013169139623642, | |
| "eval_runtime": 276.3722, | |
| "eval_samples_per_second": 79.603, | |
| "eval_steps_per_second": 9.95, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.24e-05, | |
| "loss": 0.0872, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.10060115158557892, | |
| "eval_runtime": 276.3572, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.2133333333333335e-05, | |
| "loss": 0.0825, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.10235247015953064, | |
| "eval_runtime": 276.3288, | |
| "eval_samples_per_second": 79.615, | |
| "eval_steps_per_second": 9.952, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.1866666666666668e-05, | |
| "loss": 0.0845, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.10633562505245209, | |
| "eval_runtime": 276.3334, | |
| "eval_samples_per_second": 79.614, | |
| "eval_steps_per_second": 9.952, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.16e-05, | |
| "loss": 0.0852, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 0.09862720966339111, | |
| "eval_runtime": 276.3101, | |
| "eval_samples_per_second": 79.621, | |
| "eval_steps_per_second": 9.953, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.1333333333333334e-05, | |
| "loss": 0.0852, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.10181911289691925, | |
| "eval_runtime": 276.3491, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.1066666666666667e-05, | |
| "loss": 0.0834, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 0.10195448249578476, | |
| "eval_runtime": 276.318, | |
| "eval_samples_per_second": 79.618, | |
| "eval_steps_per_second": 9.952, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.08e-05, | |
| "loss": 0.086, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 0.10091394931077957, | |
| "eval_runtime": 276.309, | |
| "eval_samples_per_second": 79.621, | |
| "eval_steps_per_second": 9.953, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.0533333333333335e-05, | |
| "loss": 0.0816, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.10171131789684296, | |
| "eval_runtime": 276.3371, | |
| "eval_samples_per_second": 79.613, | |
| "eval_steps_per_second": 9.952, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.0266666666666668e-05, | |
| "loss": 0.0898, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 0.09863689541816711, | |
| "eval_runtime": 276.3257, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0845, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.097959004342556, | |
| "eval_runtime": 276.3581, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.733333333333334e-06, | |
| "loss": 0.082, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.09837282449007034, | |
| "eval_runtime": 276.293, | |
| "eval_samples_per_second": 79.626, | |
| "eval_steps_per_second": 9.953, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.466666666666667e-06, | |
| "loss": 0.0827, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.102019764482975, | |
| "eval_runtime": 276.3457, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.2e-06, | |
| "loss": 0.0831, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 0.10030708461999893, | |
| "eval_runtime": 276.3567, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.933333333333333e-06, | |
| "loss": 0.0825, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.09614814817905426, | |
| "eval_runtime": 276.3602, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.0846, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 0.09760585427284241, | |
| "eval_runtime": 276.3738, | |
| "eval_samples_per_second": 79.602, | |
| "eval_steps_per_second": 9.95, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.0829, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 0.10096368193626404, | |
| "eval_runtime": 276.3807, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.133333333333332e-06, | |
| "loss": 0.082, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 0.09700719267129898, | |
| "eval_runtime": 276.4045, | |
| "eval_samples_per_second": 79.593, | |
| "eval_steps_per_second": 9.949, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.866666666666667e-06, | |
| "loss": 0.0841, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 0.09923473000526428, | |
| "eval_runtime": 276.4648, | |
| "eval_samples_per_second": 79.576, | |
| "eval_steps_per_second": 9.947, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.6e-06, | |
| "loss": 0.0812, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.10030511021614075, | |
| "eval_runtime": 276.336, | |
| "eval_samples_per_second": 79.613, | |
| "eval_steps_per_second": 9.952, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.333333333333334e-06, | |
| "loss": 0.0834, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.09768765419721603, | |
| "eval_runtime": 276.4436, | |
| "eval_samples_per_second": 79.582, | |
| "eval_steps_per_second": 9.948, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.066666666666667e-06, | |
| "loss": 0.084, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 0.0958966389298439, | |
| "eval_runtime": 276.4689, | |
| "eval_samples_per_second": 79.575, | |
| "eval_steps_per_second": 9.947, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.0842, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.09925191104412079, | |
| "eval_runtime": 276.3583, | |
| "eval_samples_per_second": 79.607, | |
| "eval_steps_per_second": 9.951, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.533333333333333e-06, | |
| "loss": 0.0795, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 0.09434597939252853, | |
| "eval_runtime": 276.3258, | |
| "eval_samples_per_second": 79.616, | |
| "eval_steps_per_second": 9.952, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.266666666666666e-06, | |
| "loss": 0.0804, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 0.09635218232870102, | |
| "eval_runtime": 276.3077, | |
| "eval_samples_per_second": 79.621, | |
| "eval_steps_per_second": 9.953, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0784, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.10136885195970535, | |
| "eval_runtime": 276.3551, | |
| "eval_samples_per_second": 79.608, | |
| "eval_steps_per_second": 9.951, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5.733333333333333e-06, | |
| "loss": 0.0788, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 0.09973455220460892, | |
| "eval_runtime": 276.3862, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 5.466666666666667e-06, | |
| "loss": 0.0803, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 0.09547074884176254, | |
| "eval_runtime": 276.3536, | |
| "eval_samples_per_second": 79.608, | |
| "eval_steps_per_second": 9.951, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.0826, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.09708509594202042, | |
| "eval_runtime": 276.3655, | |
| "eval_samples_per_second": 79.605, | |
| "eval_steps_per_second": 9.951, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.933333333333333e-06, | |
| "loss": 0.0804, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.09786985069513321, | |
| "eval_runtime": 276.387, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.0822, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 0.09602730721235275, | |
| "eval_runtime": 276.3871, | |
| "eval_samples_per_second": 79.598, | |
| "eval_steps_per_second": 9.95, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.0801, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 0.09536262601613998, | |
| "eval_runtime": 276.3158, | |
| "eval_samples_per_second": 79.619, | |
| "eval_steps_per_second": 9.952, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.133333333333333e-06, | |
| "loss": 0.0799, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.09693248569965363, | |
| "eval_runtime": 276.3596, | |
| "eval_samples_per_second": 79.606, | |
| "eval_steps_per_second": 9.951, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.866666666666667e-06, | |
| "loss": 0.082, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 0.09503740072250366, | |
| "eval_runtime": 276.3458, | |
| "eval_samples_per_second": 79.61, | |
| "eval_steps_per_second": 9.951, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.6e-06, | |
| "loss": 0.0825, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.09553997218608856, | |
| "eval_runtime": 276.3691, | |
| "eval_samples_per_second": 79.604, | |
| "eval_steps_per_second": 9.95, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0777, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 0.09732525050640106, | |
| "eval_runtime": 276.2807, | |
| "eval_samples_per_second": 79.629, | |
| "eval_steps_per_second": 9.954, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.066666666666667e-06, | |
| "loss": 0.0777, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 0.09695960581302643, | |
| "eval_runtime": 276.387, | |
| "eval_samples_per_second": 79.599, | |
| "eval_steps_per_second": 9.95, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.08, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.09696797281503677, | |
| "eval_runtime": 276.4257, | |
| "eval_samples_per_second": 79.587, | |
| "eval_steps_per_second": 9.948, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.5333333333333334e-06, | |
| "loss": 0.0786, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.09600003063678741, | |
| "eval_runtime": 276.3816, | |
| "eval_samples_per_second": 79.6, | |
| "eval_steps_per_second": 9.95, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.266666666666667e-06, | |
| "loss": 0.0792, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 0.09658631682395935, | |
| "eval_runtime": 276.6631, | |
| "eval_samples_per_second": 79.519, | |
| "eval_steps_per_second": 9.94, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0791, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.09506715089082718, | |
| "eval_runtime": 276.398, | |
| "eval_samples_per_second": 79.595, | |
| "eval_steps_per_second": 9.949, | |
| "step": 90000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 93750, | |
| "num_train_epochs": 2, | |
| "save_steps": 5000, | |
| "total_flos": 7.5777344667648e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |