|
{ |
|
"best_metric": 3.009519338607788, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.3870967741935484, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025806451612903226, |
|
"grad_norm": 2.02890944480896, |
|
"learning_rate": 1.007e-05, |
|
"loss": 3.2543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0025806451612903226, |
|
"eval_loss": 3.4940507411956787, |
|
"eval_runtime": 39.1123, |
|
"eval_samples_per_second": 4.167, |
|
"eval_steps_per_second": 1.048, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005161290322580645, |
|
"grad_norm": 2.1640987396240234, |
|
"learning_rate": 2.014e-05, |
|
"loss": 3.1859, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007741935483870968, |
|
"grad_norm": 2.289116859436035, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 3.1102, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01032258064516129, |
|
"grad_norm": 1.9029440879821777, |
|
"learning_rate": 4.028e-05, |
|
"loss": 3.1912, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012903225806451613, |
|
"grad_norm": 1.5423721075057983, |
|
"learning_rate": 5.035e-05, |
|
"loss": 3.3264, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015483870967741935, |
|
"grad_norm": 1.1242539882659912, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 2.9698, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01806451612903226, |
|
"grad_norm": 1.6809178590774536, |
|
"learning_rate": 7.049e-05, |
|
"loss": 2.8629, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02064516129032258, |
|
"grad_norm": 1.3089873790740967, |
|
"learning_rate": 8.056e-05, |
|
"loss": 2.9564, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023225806451612905, |
|
"grad_norm": 0.9860974550247192, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 3.0032, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.025806451612903226, |
|
"grad_norm": 0.9577404856681824, |
|
"learning_rate": 0.0001007, |
|
"loss": 2.7733, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02838709677419355, |
|
"grad_norm": 1.073362946510315, |
|
"learning_rate": 0.00010017, |
|
"loss": 3.0293, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03096774193548387, |
|
"grad_norm": 0.9577970504760742, |
|
"learning_rate": 9.963999999999999e-05, |
|
"loss": 3.0549, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03354838709677419, |
|
"grad_norm": 0.8062331676483154, |
|
"learning_rate": 9.910999999999999e-05, |
|
"loss": 2.9548, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03612903225806452, |
|
"grad_norm": 0.8570857644081116, |
|
"learning_rate": 9.858e-05, |
|
"loss": 3.1505, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03870967741935484, |
|
"grad_norm": 0.9095686674118042, |
|
"learning_rate": 9.805e-05, |
|
"loss": 2.8758, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04129032258064516, |
|
"grad_norm": 0.8106794953346252, |
|
"learning_rate": 9.752e-05, |
|
"loss": 2.9163, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04387096774193548, |
|
"grad_norm": 0.8135427832603455, |
|
"learning_rate": 9.698999999999999e-05, |
|
"loss": 3.1537, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04645161290322581, |
|
"grad_norm": 0.773794412612915, |
|
"learning_rate": 9.646e-05, |
|
"loss": 2.9923, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04903225806451613, |
|
"grad_norm": 0.776435911655426, |
|
"learning_rate": 9.593e-05, |
|
"loss": 2.994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05161290322580645, |
|
"grad_norm": 0.702139139175415, |
|
"learning_rate": 9.539999999999999e-05, |
|
"loss": 2.8807, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05419354838709677, |
|
"grad_norm": 0.6850553750991821, |
|
"learning_rate": 9.487e-05, |
|
"loss": 3.0033, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0567741935483871, |
|
"grad_norm": 0.6869837045669556, |
|
"learning_rate": 9.434e-05, |
|
"loss": 2.7906, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05935483870967742, |
|
"grad_norm": 0.7767460942268372, |
|
"learning_rate": 9.381e-05, |
|
"loss": 2.8578, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06193548387096774, |
|
"grad_norm": 0.747832179069519, |
|
"learning_rate": 9.327999999999999e-05, |
|
"loss": 2.9695, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 0.7731716632843018, |
|
"learning_rate": 9.274999999999999e-05, |
|
"loss": 2.9062, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06709677419354838, |
|
"grad_norm": 0.8283132910728455, |
|
"learning_rate": 9.222e-05, |
|
"loss": 3.1388, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0696774193548387, |
|
"grad_norm": 0.8168233036994934, |
|
"learning_rate": 9.169e-05, |
|
"loss": 3.0032, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07225806451612904, |
|
"grad_norm": 0.7814300060272217, |
|
"learning_rate": 9.116e-05, |
|
"loss": 3.0617, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07483870967741936, |
|
"grad_norm": 0.8907764554023743, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 2.8873, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07741935483870968, |
|
"grad_norm": 0.897400975227356, |
|
"learning_rate": 9.01e-05, |
|
"loss": 3.0865, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.916833758354187, |
|
"learning_rate": 8.957e-05, |
|
"loss": 2.9955, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08258064516129032, |
|
"grad_norm": 0.9079581499099731, |
|
"learning_rate": 8.903999999999999e-05, |
|
"loss": 2.9475, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08516129032258064, |
|
"grad_norm": 1.2848162651062012, |
|
"learning_rate": 8.850999999999999e-05, |
|
"loss": 3.0854, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08774193548387096, |
|
"grad_norm": 1.0301451683044434, |
|
"learning_rate": 8.798e-05, |
|
"loss": 3.2001, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09032258064516129, |
|
"grad_norm": 0.9421987533569336, |
|
"learning_rate": 8.745e-05, |
|
"loss": 2.7962, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09290322580645162, |
|
"grad_norm": 1.2306110858917236, |
|
"learning_rate": 8.692e-05, |
|
"loss": 3.1929, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09548387096774194, |
|
"grad_norm": 1.1693624258041382, |
|
"learning_rate": 8.638999999999999e-05, |
|
"loss": 3.1043, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09806451612903226, |
|
"grad_norm": 1.169491171836853, |
|
"learning_rate": 8.586e-05, |
|
"loss": 2.7704, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10064516129032258, |
|
"grad_norm": 1.1204756498336792, |
|
"learning_rate": 8.533e-05, |
|
"loss": 3.2268, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1032258064516129, |
|
"grad_norm": 1.1709730625152588, |
|
"learning_rate": 8.479999999999999e-05, |
|
"loss": 2.9685, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10580645161290322, |
|
"grad_norm": 1.2603025436401367, |
|
"learning_rate": 8.427e-05, |
|
"loss": 2.9147, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10838709677419354, |
|
"grad_norm": 1.5371952056884766, |
|
"learning_rate": 8.374e-05, |
|
"loss": 2.9618, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11096774193548387, |
|
"grad_norm": 1.4978915452957153, |
|
"learning_rate": 8.321e-05, |
|
"loss": 3.0561, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1135483870967742, |
|
"grad_norm": 1.8759700059890747, |
|
"learning_rate": 8.268e-05, |
|
"loss": 3.5141, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11612903225806452, |
|
"grad_norm": 1.6922487020492554, |
|
"learning_rate": 8.214999999999999e-05, |
|
"loss": 3.229, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11870967741935484, |
|
"grad_norm": 1.9749841690063477, |
|
"learning_rate": 8.162e-05, |
|
"loss": 3.2539, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12129032258064516, |
|
"grad_norm": 2.2926204204559326, |
|
"learning_rate": 8.108999999999998e-05, |
|
"loss": 3.1085, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12387096774193548, |
|
"grad_norm": 4.02115535736084, |
|
"learning_rate": 8.056e-05, |
|
"loss": 3.1481, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12645161290322582, |
|
"grad_norm": 4.62841272354126, |
|
"learning_rate": 8.003e-05, |
|
"loss": 3.7727, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 6.652851581573486, |
|
"learning_rate": 7.95e-05, |
|
"loss": 3.9126, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"eval_loss": 3.116821765899658, |
|
"eval_runtime": 38.3432, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 1.069, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13161290322580646, |
|
"grad_norm": 1.2032880783081055, |
|
"learning_rate": 7.897e-05, |
|
"loss": 2.8811, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13419354838709677, |
|
"grad_norm": 0.9579372406005859, |
|
"learning_rate": 7.843999999999999e-05, |
|
"loss": 2.9946, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1367741935483871, |
|
"grad_norm": 0.7830987572669983, |
|
"learning_rate": 7.790999999999999e-05, |
|
"loss": 2.8823, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1393548387096774, |
|
"grad_norm": 0.6758972406387329, |
|
"learning_rate": 7.738e-05, |
|
"loss": 2.9346, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14193548387096774, |
|
"grad_norm": 0.6744924187660217, |
|
"learning_rate": 7.685e-05, |
|
"loss": 2.8026, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14451612903225808, |
|
"grad_norm": 0.6925913691520691, |
|
"learning_rate": 7.632e-05, |
|
"loss": 2.9121, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14709677419354839, |
|
"grad_norm": 0.6952354311943054, |
|
"learning_rate": 7.578999999999999e-05, |
|
"loss": 2.9217, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14967741935483872, |
|
"grad_norm": 0.6015385985374451, |
|
"learning_rate": 7.526e-05, |
|
"loss": 2.911, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15225806451612903, |
|
"grad_norm": 0.6113649606704712, |
|
"learning_rate": 7.473e-05, |
|
"loss": 2.8453, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15483870967741936, |
|
"grad_norm": 0.6024471521377563, |
|
"learning_rate": 7.419999999999999e-05, |
|
"loss": 2.7338, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15741935483870967, |
|
"grad_norm": 0.6171099543571472, |
|
"learning_rate": 7.367e-05, |
|
"loss": 2.7157, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6734641790390015, |
|
"learning_rate": 7.314e-05, |
|
"loss": 3.0369, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1625806451612903, |
|
"grad_norm": 0.6411603689193726, |
|
"learning_rate": 7.261e-05, |
|
"loss": 2.9175, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16516129032258065, |
|
"grad_norm": 0.6401498913764954, |
|
"learning_rate": 7.208e-05, |
|
"loss": 2.9601, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16774193548387098, |
|
"grad_norm": 0.6685578227043152, |
|
"learning_rate": 7.154999999999999e-05, |
|
"loss": 2.9055, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1703225806451613, |
|
"grad_norm": 0.6739301681518555, |
|
"learning_rate": 7.102e-05, |
|
"loss": 2.8973, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17290322580645162, |
|
"grad_norm": 0.6716254353523254, |
|
"learning_rate": 7.049e-05, |
|
"loss": 2.8507, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17548387096774193, |
|
"grad_norm": 0.6408494710922241, |
|
"learning_rate": 6.996e-05, |
|
"loss": 2.8232, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17806451612903226, |
|
"grad_norm": 0.6635752320289612, |
|
"learning_rate": 6.943e-05, |
|
"loss": 3.088, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.18064516129032257, |
|
"grad_norm": 0.710978090763092, |
|
"learning_rate": 6.89e-05, |
|
"loss": 2.8905, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1832258064516129, |
|
"grad_norm": 0.7735083103179932, |
|
"learning_rate": 6.837e-05, |
|
"loss": 2.9583, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18580645161290324, |
|
"grad_norm": 0.7552114725112915, |
|
"learning_rate": 6.784e-05, |
|
"loss": 2.8666, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18838709677419355, |
|
"grad_norm": 0.8119356036186218, |
|
"learning_rate": 6.730999999999999e-05, |
|
"loss": 3.0893, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19096774193548388, |
|
"grad_norm": 0.7227278351783752, |
|
"learning_rate": 6.678e-05, |
|
"loss": 2.9174, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 0.7297806143760681, |
|
"learning_rate": 6.625e-05, |
|
"loss": 2.9618, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19612903225806452, |
|
"grad_norm": 0.7950009107589722, |
|
"learning_rate": 6.572e-05, |
|
"loss": 2.8738, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19870967741935483, |
|
"grad_norm": 0.7869434952735901, |
|
"learning_rate": 6.519e-05, |
|
"loss": 2.7843, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.20129032258064516, |
|
"grad_norm": 0.8707318902015686, |
|
"learning_rate": 6.466e-05, |
|
"loss": 3.1622, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.20387096774193547, |
|
"grad_norm": 0.8520801663398743, |
|
"learning_rate": 6.413e-05, |
|
"loss": 3.0377, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2064516129032258, |
|
"grad_norm": 0.9687163233757019, |
|
"learning_rate": 6.359999999999999e-05, |
|
"loss": 3.0816, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20903225806451614, |
|
"grad_norm": 0.8639389872550964, |
|
"learning_rate": 6.306999999999999e-05, |
|
"loss": 2.8825, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.21161290322580645, |
|
"grad_norm": 1.042325735092163, |
|
"learning_rate": 6.254000000000001e-05, |
|
"loss": 2.9446, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21419354838709678, |
|
"grad_norm": 0.9391971230506897, |
|
"learning_rate": 6.201e-05, |
|
"loss": 3.0544, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2167741935483871, |
|
"grad_norm": 1.0379990339279175, |
|
"learning_rate": 6.148e-05, |
|
"loss": 3.1278, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21935483870967742, |
|
"grad_norm": 1.0052063465118408, |
|
"learning_rate": 6.095e-05, |
|
"loss": 3.0154, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22193548387096773, |
|
"grad_norm": 1.1294814348220825, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 2.9711, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.22451612903225807, |
|
"grad_norm": 1.1187207698822021, |
|
"learning_rate": 5.988999999999999e-05, |
|
"loss": 2.7353, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2270967741935484, |
|
"grad_norm": 1.1556931734085083, |
|
"learning_rate": 5.9359999999999994e-05, |
|
"loss": 3.0027, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2296774193548387, |
|
"grad_norm": 1.4021755456924438, |
|
"learning_rate": 5.8830000000000004e-05, |
|
"loss": 2.9716, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.23225806451612904, |
|
"grad_norm": 1.32869291305542, |
|
"learning_rate": 5.83e-05, |
|
"loss": 3.2768, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23483870967741935, |
|
"grad_norm": 1.6008881330490112, |
|
"learning_rate": 5.777e-05, |
|
"loss": 3.2203, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23741935483870968, |
|
"grad_norm": 1.739267349243164, |
|
"learning_rate": 5.7239999999999994e-05, |
|
"loss": 3.0869, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.6709328889846802, |
|
"learning_rate": 5.671e-05, |
|
"loss": 3.1174, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.24258064516129033, |
|
"grad_norm": 2.5117313861846924, |
|
"learning_rate": 5.6179999999999994e-05, |
|
"loss": 3.2951, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24516129032258063, |
|
"grad_norm": 2.1630053520202637, |
|
"learning_rate": 5.5650000000000004e-05, |
|
"loss": 3.3658, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24774193548387097, |
|
"grad_norm": 2.027144193649292, |
|
"learning_rate": 5.512e-05, |
|
"loss": 3.3735, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2503225806451613, |
|
"grad_norm": 2.5083370208740234, |
|
"learning_rate": 5.459e-05, |
|
"loss": 2.9347, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.25290322580645164, |
|
"grad_norm": 2.995940685272217, |
|
"learning_rate": 5.406e-05, |
|
"loss": 3.5815, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.25548387096774194, |
|
"grad_norm": 3.9194164276123047, |
|
"learning_rate": 5.353e-05, |
|
"loss": 3.3064, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 7.003715991973877, |
|
"learning_rate": 5.2999999999999994e-05, |
|
"loss": 3.744, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"eval_loss": 3.0592901706695557, |
|
"eval_runtime": 38.3187, |
|
"eval_samples_per_second": 4.254, |
|
"eval_steps_per_second": 1.07, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26064516129032256, |
|
"grad_norm": 0.8486892580986023, |
|
"learning_rate": 5.246999999999999e-05, |
|
"loss": 2.855, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2632258064516129, |
|
"grad_norm": 0.8181604146957397, |
|
"learning_rate": 5.194e-05, |
|
"loss": 3.0667, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2658064516129032, |
|
"grad_norm": 0.7249521017074585, |
|
"learning_rate": 5.141e-05, |
|
"loss": 2.6855, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26838709677419353, |
|
"grad_norm": 0.7008607387542725, |
|
"learning_rate": 5.088e-05, |
|
"loss": 2.9785, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2709677419354839, |
|
"grad_norm": 0.6476490497589111, |
|
"learning_rate": 5.035e-05, |
|
"loss": 3.0082, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2735483870967742, |
|
"grad_norm": 0.618168294429779, |
|
"learning_rate": 4.9819999999999994e-05, |
|
"loss": 2.8595, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2761290322580645, |
|
"grad_norm": 0.6012650728225708, |
|
"learning_rate": 4.929e-05, |
|
"loss": 2.7147, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2787096774193548, |
|
"grad_norm": 0.6011011600494385, |
|
"learning_rate": 4.876e-05, |
|
"loss": 2.7046, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2812903225806452, |
|
"grad_norm": 0.9905216693878174, |
|
"learning_rate": 4.823e-05, |
|
"loss": 2.8049, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2838709677419355, |
|
"grad_norm": 0.5935449600219727, |
|
"learning_rate": 4.7699999999999994e-05, |
|
"loss": 2.8319, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2864516129032258, |
|
"grad_norm": 0.5935178399085999, |
|
"learning_rate": 4.717e-05, |
|
"loss": 2.9189, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28903225806451616, |
|
"grad_norm": 0.6392220854759216, |
|
"learning_rate": 4.6639999999999994e-05, |
|
"loss": 2.6776, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.29161290322580646, |
|
"grad_norm": 0.6458805799484253, |
|
"learning_rate": 4.611e-05, |
|
"loss": 2.8857, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.29419354838709677, |
|
"grad_norm": 0.6118318438529968, |
|
"learning_rate": 4.558e-05, |
|
"loss": 2.7875, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2967741935483871, |
|
"grad_norm": 0.6118870377540588, |
|
"learning_rate": 4.505e-05, |
|
"loss": 2.7211, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.29935483870967744, |
|
"grad_norm": 0.6446405053138733, |
|
"learning_rate": 4.4519999999999994e-05, |
|
"loss": 2.9338, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.30193548387096775, |
|
"grad_norm": 0.641838014125824, |
|
"learning_rate": 4.399e-05, |
|
"loss": 2.6843, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.30451612903225805, |
|
"grad_norm": 0.609104335308075, |
|
"learning_rate": 4.346e-05, |
|
"loss": 2.848, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.30709677419354836, |
|
"grad_norm": 0.6547131538391113, |
|
"learning_rate": 4.293e-05, |
|
"loss": 2.9458, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3096774193548387, |
|
"grad_norm": 0.7462600469589233, |
|
"learning_rate": 4.2399999999999994e-05, |
|
"loss": 3.1013, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31225806451612903, |
|
"grad_norm": 0.6465981602668762, |
|
"learning_rate": 4.187e-05, |
|
"loss": 2.7645, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.31483870967741934, |
|
"grad_norm": 0.7226136922836304, |
|
"learning_rate": 4.134e-05, |
|
"loss": 3.041, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3174193548387097, |
|
"grad_norm": 0.8358485698699951, |
|
"learning_rate": 4.081e-05, |
|
"loss": 2.996, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.823491632938385, |
|
"learning_rate": 4.028e-05, |
|
"loss": 3.0795, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 0.7459017634391785, |
|
"learning_rate": 3.975e-05, |
|
"loss": 2.9079, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3251612903225806, |
|
"grad_norm": 0.7506483197212219, |
|
"learning_rate": 3.9219999999999994e-05, |
|
"loss": 2.7378, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.327741935483871, |
|
"grad_norm": 0.8168128728866577, |
|
"learning_rate": 3.869e-05, |
|
"loss": 3.1751, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3303225806451613, |
|
"grad_norm": 0.8122110962867737, |
|
"learning_rate": 3.816e-05, |
|
"loss": 3.0125, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3329032258064516, |
|
"grad_norm": 0.8495927453041077, |
|
"learning_rate": 3.763e-05, |
|
"loss": 2.903, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.33548387096774196, |
|
"grad_norm": 0.9135481119155884, |
|
"learning_rate": 3.7099999999999994e-05, |
|
"loss": 3.0412, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.33806451612903227, |
|
"grad_norm": 0.8771790862083435, |
|
"learning_rate": 3.657e-05, |
|
"loss": 2.9565, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3406451612903226, |
|
"grad_norm": 0.899596631526947, |
|
"learning_rate": 3.604e-05, |
|
"loss": 3.1069, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3432258064516129, |
|
"grad_norm": 1.055289626121521, |
|
"learning_rate": 3.551e-05, |
|
"loss": 2.9057, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.34580645161290324, |
|
"grad_norm": 1.0215229988098145, |
|
"learning_rate": 3.498e-05, |
|
"loss": 3.0173, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.34838709677419355, |
|
"grad_norm": 1.0191702842712402, |
|
"learning_rate": 3.445e-05, |
|
"loss": 3.1466, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.35096774193548386, |
|
"grad_norm": 1.1208670139312744, |
|
"learning_rate": 3.392e-05, |
|
"loss": 3.2075, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3535483870967742, |
|
"grad_norm": 1.1400412321090698, |
|
"learning_rate": 3.339e-05, |
|
"loss": 3.159, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3561290322580645, |
|
"grad_norm": 1.1518625020980835, |
|
"learning_rate": 3.286e-05, |
|
"loss": 3.1083, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.35870967741935483, |
|
"grad_norm": 1.382748007774353, |
|
"learning_rate": 3.233e-05, |
|
"loss": 2.9339, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.36129032258064514, |
|
"grad_norm": 1.279389500617981, |
|
"learning_rate": 3.1799999999999994e-05, |
|
"loss": 2.9156, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3638709677419355, |
|
"grad_norm": 1.5063074827194214, |
|
"learning_rate": 3.1270000000000004e-05, |
|
"loss": 3.4519, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3664516129032258, |
|
"grad_norm": 1.417640209197998, |
|
"learning_rate": 3.074e-05, |
|
"loss": 3.0448, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3690322580645161, |
|
"grad_norm": 1.8814888000488281, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 2.7309, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3716129032258065, |
|
"grad_norm": 2.1574666500091553, |
|
"learning_rate": 2.9679999999999997e-05, |
|
"loss": 3.2975, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3741935483870968, |
|
"grad_norm": 2.018376350402832, |
|
"learning_rate": 2.915e-05, |
|
"loss": 2.935, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3767741935483871, |
|
"grad_norm": 2.580366373062134, |
|
"learning_rate": 2.8619999999999997e-05, |
|
"loss": 3.0637, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3793548387096774, |
|
"grad_norm": 2.8553295135498047, |
|
"learning_rate": 2.8089999999999997e-05, |
|
"loss": 3.2642, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.38193548387096776, |
|
"grad_norm": 4.084458351135254, |
|
"learning_rate": 2.756e-05, |
|
"loss": 3.5309, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.38451612903225807, |
|
"grad_norm": 7.080618858337402, |
|
"learning_rate": 2.703e-05, |
|
"loss": 3.7543, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 9.050718307495117, |
|
"learning_rate": 2.6499999999999997e-05, |
|
"loss": 4.0362, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"eval_loss": 3.009519338607788, |
|
"eval_runtime": 37.8132, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 1.084, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2642772657058611e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|