diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,116088 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.999864308652585, - "eval_steps": 500, - "global_step": 16581, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 2.409638554216868e-08, - "loss": 0.9743, - "step": 1 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 4.819277108433736e-08, - "loss": 0.9869, - "step": 2 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 7.228915662650603e-08, - "loss": 1.0026, - "step": 3 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 9.638554216867472e-08, - "loss": 0.9549, - "step": 4 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 1.204819277108434e-07, - "loss": 1.1009, - "step": 5 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 1.4457831325301206e-07, - "loss": 1.0205, - "step": 6 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 1.6867469879518075e-07, - "loss": 1.0837, - "step": 7 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 1.9277108433734944e-07, - "loss": 1.0184, - "step": 8 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 2.1686746987951808e-07, - "loss": 0.9589, - "step": 9 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 2.409638554216868e-07, - "loss": 0.9009, - "step": 10 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 2.6506024096385546e-07, - "loss": 1.036, - "step": 11 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 2.891566265060241e-07, - "loss": 0.9584, - "step": 12 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 3.1325301204819284e-07, - "loss": 0.9773, - "step": 13 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 3.373493975903615e-07, - "loss": 1.1104, - "step": 14 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 3.614457831325301e-07, - "loss": 1.1513, - "step": 15 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 3.855421686746989e-07, - "loss": 1.0492, - "step": 16 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 4.0963855421686754e-07, - "loss": 1.0191, - "step": 17 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 4.3373493975903615e-07, - "loss": 0.9468, - "step": 18 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 4.578313253012048e-07, - "loss": 1.165, - "step": 19 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 4.819277108433736e-07, - "loss": 1.0841, - "step": 20 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 5.060240963855422e-07, - "loss": 1.0159, - "step": 21 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 5.301204819277109e-07, - "loss": 0.9761, - "step": 22 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 5.542168674698796e-07, - "loss": 1.0644, - "step": 23 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 5.783132530120482e-07, - "loss": 1.0985, - "step": 24 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 6.024096385542169e-07, - "loss": 0.9739, - "step": 25 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 6.265060240963857e-07, - "loss": 1.025, - "step": 26 - }, - { - "epoch": 0.0, - "grad_norm": 0.0, - "learning_rate": 6.506024096385542e-07, - "loss": 0.764, - "step": 27 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 6.74698795180723e-07, - "loss": 0.9388, - "step": 28 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 6.987951807228917e-07, - "loss": 0.9837, - "step": 29 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 7.228915662650602e-07, - "loss": 1.0166, - "step": 30 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 7.46987951807229e-07, - "loss": 0.9783, - "step": 31 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 7.710843373493978e-07, - "loss": 0.8671, - "step": 32 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 7.951807228915663e-07, - "loss": 0.9103, - "step": 33 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 8.192771084337351e-07, - "loss": 0.8963, - "step": 34 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 8.433734939759036e-07, - "loss": 0.9083, - "step": 35 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 8.674698795180723e-07, - "loss": 1.0757, - "step": 36 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 8.915662650602411e-07, - "loss": 0.8977, - "step": 37 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 9.156626506024096e-07, - "loss": 0.7709, - "step": 38 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 9.397590361445784e-07, - "loss": 0.8283, - "step": 39 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 9.638554216867472e-07, - "loss": 0.9474, - "step": 40 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 9.879518072289156e-07, - "loss": 0.9703, - "step": 41 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.0120481927710845e-06, - "loss": 1.0451, - "step": 42 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.0361445783132532e-06, - "loss": 0.9943, - "step": 43 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.0602409638554218e-06, - "loss": 0.9351, - "step": 44 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.0843373493975905e-06, - "loss": 0.926, - "step": 45 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.1084337349397592e-06, - "loss": 0.7793, - "step": 46 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.1325301204819278e-06, - "loss": 0.9989, - "step": 47 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.1566265060240965e-06, - "loss": 0.8573, - "step": 48 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.1807228915662651e-06, - "loss": 0.9114, - "step": 49 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.2048192771084338e-06, - "loss": 0.9209, - "step": 50 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.2289156626506025e-06, - "loss": 0.7381, - "step": 51 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.2530120481927713e-06, - "loss": 0.8311, - "step": 52 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.2771084337349398e-06, - "loss": 0.8233, - "step": 53 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.3012048192771085e-06, - "loss": 0.9181, - "step": 54 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.3253012048192773e-06, - "loss": 0.7713, - "step": 55 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.349397590361446e-06, - "loss": 0.8403, - "step": 56 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.3734939759036144e-06, - "loss": 0.8452, - "step": 57 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.3975903614457833e-06, - "loss": 0.8614, - "step": 58 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.421686746987952e-06, - "loss": 0.8549, - "step": 59 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.4457831325301204e-06, - "loss": 0.7805, - "step": 60 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.4698795180722893e-06, - "loss": 0.9936, - "step": 61 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.493975903614458e-06, - "loss": 0.9183, - "step": 62 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.5180722891566266e-06, - "loss": 0.8625, - "step": 63 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.5421686746987955e-06, - "loss": 0.935, - "step": 64 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.566265060240964e-06, - "loss": 0.8709, - "step": 65 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.5903614457831326e-06, - "loss": 0.9301, - "step": 66 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.6144578313253013e-06, - "loss": 0.8777, - "step": 67 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.6385542168674702e-06, - "loss": 0.9432, - "step": 68 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.6626506024096386e-06, - "loss": 0.8644, - "step": 69 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.6867469879518073e-06, - "loss": 0.8981, - "step": 70 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.7108433734939762e-06, - "loss": 0.9605, - "step": 71 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.7349397590361446e-06, - "loss": 0.8459, - "step": 72 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.7590361445783133e-06, - "loss": 0.8602, - "step": 73 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.7831325301204822e-06, - "loss": 0.8717, - "step": 74 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.8072289156626508e-06, - "loss": 0.8069, - "step": 75 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.8313253012048193e-06, - "loss": 0.7453, - "step": 76 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.8554216867469881e-06, - "loss": 0.7395, - "step": 77 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.8795180722891568e-06, - "loss": 0.7982, - "step": 78 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.9036144578313255e-06, - "loss": 0.9521, - "step": 79 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.9277108433734943e-06, - "loss": 0.8014, - "step": 80 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.951807228915663e-06, - "loss": 0.9159, - "step": 81 - }, - { - "epoch": 0.01, - "grad_norm": 0.0, - "learning_rate": 1.9759036144578312e-06, - "loss": 0.7533, - "step": 82 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.7818, - "step": 83 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.024096385542169e-06, - "loss": 1.0239, - "step": 84 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.0481927710843377e-06, - "loss": 0.8102, - "step": 85 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.0722891566265063e-06, - "loss": 0.8132, - "step": 86 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.096385542168675e-06, - "loss": 0.7891, - "step": 87 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.1204819277108437e-06, - "loss": 0.8195, - "step": 88 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.1445783132530123e-06, - "loss": 0.9196, - "step": 89 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.168674698795181e-06, - "loss": 0.9107, - "step": 90 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.1927710843373496e-06, - "loss": 0.8555, - "step": 91 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.2168674698795183e-06, - "loss": 0.8682, - "step": 92 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.240963855421687e-06, - "loss": 0.794, - "step": 93 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.2650602409638556e-06, - "loss": 0.847, - "step": 94 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.2891566265060243e-06, - "loss": 0.8618, - "step": 95 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.313253012048193e-06, - "loss": 0.7924, - "step": 96 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.3373493975903616e-06, - "loss": 0.8853, - "step": 97 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.3614457831325303e-06, - "loss": 0.8866, - "step": 98 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.385542168674699e-06, - "loss": 0.9155, - "step": 99 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.4096385542168676e-06, - "loss": 0.8864, - "step": 100 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.4337349397590363e-06, - "loss": 0.9367, - "step": 101 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.457831325301205e-06, - "loss": 0.7861, - "step": 102 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.4819277108433736e-06, - "loss": 0.941, - "step": 103 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.5060240963855427e-06, - "loss": 0.7836, - "step": 104 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.530120481927711e-06, - "loss": 0.9304, - "step": 105 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.5542168674698796e-06, - "loss": 0.8453, - "step": 106 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.5783132530120487e-06, - "loss": 0.8871, - "step": 107 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.602409638554217e-06, - "loss": 0.9257, - "step": 108 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.6265060240963856e-06, - "loss": 0.8205, - "step": 109 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.6506024096385547e-06, - "loss": 0.8407, - "step": 110 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.674698795180723e-06, - "loss": 0.8103, - "step": 111 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.698795180722892e-06, - "loss": 0.9891, - "step": 112 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.7228915662650607e-06, - "loss": 0.8464, - "step": 113 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.746987951807229e-06, - "loss": 0.9959, - "step": 114 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.771084337349398e-06, - "loss": 0.8855, - "step": 115 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.7951807228915666e-06, - "loss": 0.9141, - "step": 116 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.819277108433735e-06, - "loss": 0.6838, - "step": 117 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.843373493975904e-06, - "loss": 0.976, - "step": 118 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.8674698795180726e-06, - "loss": 0.7756, - "step": 119 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.891566265060241e-06, - "loss": 0.8786, - "step": 120 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.91566265060241e-06, - "loss": 0.813, - "step": 121 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.9397590361445786e-06, - "loss": 0.8737, - "step": 122 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.9638554216867473e-06, - "loss": 0.8688, - "step": 123 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 2.987951807228916e-06, - "loss": 0.7912, - "step": 124 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.012048192771085e-06, - "loss": 0.8959, - "step": 125 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.0361445783132533e-06, - "loss": 0.904, - "step": 126 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.060240963855422e-06, - "loss": 0.8827, - "step": 127 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.084337349397591e-06, - "loss": 0.8428, - "step": 128 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.1084337349397593e-06, - "loss": 0.8092, - "step": 129 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.132530120481928e-06, - "loss": 0.9043, - "step": 130 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.156626506024096e-06, - "loss": 0.7381, - "step": 131 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.1807228915662653e-06, - "loss": 0.7471, - "step": 132 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.204819277108434e-06, - "loss": 1.0998, - "step": 133 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.2289156626506026e-06, - "loss": 0.7702, - "step": 134 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.2530120481927713e-06, - "loss": 0.7539, - "step": 135 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.2771084337349403e-06, - "loss": 0.7938, - "step": 136 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.3012048192771086e-06, - "loss": 0.9088, - "step": 137 - }, - { - "epoch": 0.02, - "grad_norm": 0.0, - "learning_rate": 3.3253012048192772e-06, - "loss": 0.806, - "step": 138 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.3493975903614463e-06, - "loss": 1.0596, - "step": 139 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.3734939759036146e-06, - "loss": 0.7884, - "step": 140 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.3975903614457832e-06, - "loss": 0.8047, - "step": 141 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.4216867469879523e-06, - "loss": 0.8864, - "step": 142 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.4457831325301206e-06, - "loss": 0.7966, - "step": 143 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.4698795180722892e-06, - "loss": 0.8266, - "step": 144 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.4939759036144583e-06, - "loss": 0.9024, - "step": 145 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.5180722891566266e-06, - "loss": 0.9315, - "step": 146 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.5421686746987956e-06, - "loss": 0.924, - "step": 147 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.5662650602409643e-06, - "loss": 0.8357, - "step": 148 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.5903614457831325e-06, - "loss": 1.003, - "step": 149 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.6144578313253016e-06, - "loss": 0.8439, - "step": 150 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.6385542168674703e-06, - "loss": 0.8326, - "step": 151 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.6626506024096385e-06, - "loss": 0.8099, - "step": 152 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.6867469879518076e-06, - "loss": 0.818, - "step": 153 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.7108433734939763e-06, - "loss": 0.7093, - "step": 154 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.7349397590361445e-06, - "loss": 0.9575, - "step": 155 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.7590361445783136e-06, - "loss": 0.8311, - "step": 156 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.7831325301204823e-06, - "loss": 0.7725, - "step": 157 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.807228915662651e-06, - "loss": 1.0212, - "step": 158 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.83132530120482e-06, - "loss": 0.8964, - "step": 159 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.855421686746989e-06, - "loss": 0.7592, - "step": 160 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.879518072289157e-06, - "loss": 0.8934, - "step": 161 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.903614457831326e-06, - "loss": 0.8698, - "step": 162 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.927710843373494e-06, - "loss": 0.8078, - "step": 163 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.9518072289156625e-06, - "loss": 0.7154, - "step": 164 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 3.975903614457832e-06, - "loss": 0.9779, - "step": 165 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.000000000000001e-06, - "loss": 0.8049, - "step": 166 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.024096385542169e-06, - "loss": 0.8288, - "step": 167 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.048192771084338e-06, - "loss": 0.8676, - "step": 168 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.072289156626506e-06, - "loss": 0.9173, - "step": 169 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.096385542168675e-06, - "loss": 0.9412, - "step": 170 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.1204819277108436e-06, - "loss": 0.8763, - "step": 171 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.144578313253013e-06, - "loss": 0.7531, - "step": 172 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.168674698795181e-06, - "loss": 0.7869, - "step": 173 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.19277108433735e-06, - "loss": 0.7799, - "step": 174 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.216867469879519e-06, - "loss": 0.7253, - "step": 175 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.240963855421687e-06, - "loss": 0.8264, - "step": 176 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.2650602409638555e-06, - "loss": 0.7303, - "step": 177 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.289156626506025e-06, - "loss": 0.7735, - "step": 178 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.313253012048193e-06, - "loss": 0.9495, - "step": 179 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.337349397590362e-06, - "loss": 0.8625, - "step": 180 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.361445783132531e-06, - "loss": 0.835, - "step": 181 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.385542168674699e-06, - "loss": 0.8373, - "step": 182 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.4096385542168675e-06, - "loss": 0.9069, - "step": 183 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.433734939759037e-06, - "loss": 0.8048, - "step": 184 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.457831325301205e-06, - "loss": 0.8948, - "step": 185 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.481927710843374e-06, - "loss": 0.8054, - "step": 186 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.506024096385542e-06, - "loss": 0.7441, - "step": 187 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.530120481927711e-06, - "loss": 0.8538, - "step": 188 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.55421686746988e-06, - "loss": 0.9052, - "step": 189 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.578313253012049e-06, - "loss": 0.8101, - "step": 190 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.602409638554217e-06, - "loss": 0.7953, - "step": 191 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.626506024096386e-06, - "loss": 0.7094, - "step": 192 - }, - { - "epoch": 0.03, - "grad_norm": 0.0, - "learning_rate": 4.650602409638554e-06, - "loss": 0.7708, - "step": 193 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.674698795180723e-06, - "loss": 0.643, - "step": 194 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.698795180722892e-06, - "loss": 0.902, - "step": 195 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.7228915662650606e-06, - "loss": 0.724, - "step": 196 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.74698795180723e-06, - "loss": 0.9544, - "step": 197 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.771084337349398e-06, - "loss": 0.8213, - "step": 198 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.795180722891566e-06, - "loss": 0.7385, - "step": 199 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.819277108433735e-06, - "loss": 0.7376, - "step": 200 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.843373493975904e-06, - "loss": 0.7602, - "step": 201 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.8674698795180725e-06, - "loss": 0.8151, - "step": 202 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.891566265060242e-06, - "loss": 0.852, - "step": 203 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.91566265060241e-06, - "loss": 0.7916, - "step": 204 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.939759036144578e-06, - "loss": 0.8066, - "step": 205 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.963855421686747e-06, - "loss": 0.7783, - "step": 206 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 4.987951807228916e-06, - "loss": 0.8647, - "step": 207 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.012048192771085e-06, - "loss": 0.8359, - "step": 208 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.036144578313254e-06, - "loss": 0.909, - "step": 209 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.060240963855422e-06, - "loss": 0.8486, - "step": 210 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.084337349397591e-06, - "loss": 0.7465, - "step": 211 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.108433734939759e-06, - "loss": 0.905, - "step": 212 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.132530120481927e-06, - "loss": 0.7312, - "step": 213 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.156626506024097e-06, - "loss": 0.6557, - "step": 214 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.180722891566266e-06, - "loss": 0.9171, - "step": 215 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.204819277108434e-06, - "loss": 0.9248, - "step": 216 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.228915662650603e-06, - "loss": 0.8044, - "step": 217 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.253012048192771e-06, - "loss": 0.9731, - "step": 218 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.27710843373494e-06, - "loss": 0.6523, - "step": 219 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.301204819277109e-06, - "loss": 0.805, - "step": 220 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.325301204819278e-06, - "loss": 0.9902, - "step": 221 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.349397590361446e-06, - "loss": 0.7854, - "step": 222 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.373493975903615e-06, - "loss": 0.9584, - "step": 223 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.397590361445784e-06, - "loss": 0.8672, - "step": 224 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.421686746987952e-06, - "loss": 0.7263, - "step": 225 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.445783132530121e-06, - "loss": 0.7679, - "step": 226 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.4698795180722896e-06, - "loss": 0.8068, - "step": 227 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.493975903614458e-06, - "loss": 0.7711, - "step": 228 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.518072289156628e-06, - "loss": 0.7754, - "step": 229 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.542168674698796e-06, - "loss": 0.8442, - "step": 230 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.566265060240964e-06, - "loss": 0.7627, - "step": 231 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.590361445783133e-06, - "loss": 0.8662, - "step": 232 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.6144578313253015e-06, - "loss": 0.9534, - "step": 233 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.63855421686747e-06, - "loss": 0.8102, - "step": 234 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.66265060240964e-06, - "loss": 0.9224, - "step": 235 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.686746987951808e-06, - "loss": 0.9174, - "step": 236 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.710843373493976e-06, - "loss": 0.7735, - "step": 237 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.734939759036145e-06, - "loss": 0.7251, - "step": 238 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.7590361445783135e-06, - "loss": 0.8644, - "step": 239 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.783132530120482e-06, - "loss": 0.8419, - "step": 240 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.807228915662652e-06, - "loss": 0.9772, - "step": 241 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.83132530120482e-06, - "loss": 0.9369, - "step": 242 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.855421686746988e-06, - "loss": 0.8601, - "step": 243 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.879518072289157e-06, - "loss": 0.9178, - "step": 244 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.9036144578313255e-06, - "loss": 0.8448, - "step": 245 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.927710843373495e-06, - "loss": 0.832, - "step": 246 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.951807228915664e-06, - "loss": 0.7202, - "step": 247 - }, - { - "epoch": 0.04, - "grad_norm": 0.0, - "learning_rate": 5.975903614457832e-06, - "loss": 0.7778, - "step": 248 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6e-06, - "loss": 0.7644, - "step": 249 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.02409638554217e-06, - "loss": 1.0002, - "step": 250 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.048192771084338e-06, - "loss": 0.8206, - "step": 251 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.0722891566265066e-06, - "loss": 0.8442, - "step": 252 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.096385542168676e-06, - "loss": 0.8246, - "step": 253 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.120481927710844e-06, - "loss": 0.711, - "step": 254 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.144578313253012e-06, - "loss": 0.9696, - "step": 255 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.168674698795182e-06, - "loss": 0.791, - "step": 256 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.19277108433735e-06, - "loss": 0.9107, - "step": 257 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.2168674698795185e-06, - "loss": 0.8154, - "step": 258 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.240963855421688e-06, - "loss": 0.8563, - "step": 259 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.265060240963856e-06, - "loss": 0.8976, - "step": 260 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.289156626506024e-06, - "loss": 0.8899, - "step": 261 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.313253012048192e-06, - "loss": 0.9071, - "step": 262 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.337349397590362e-06, - "loss": 0.8397, - "step": 263 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.3614457831325305e-06, - "loss": 0.8072, - "step": 264 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.385542168674699e-06, - "loss": 0.8384, - "step": 265 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.409638554216868e-06, - "loss": 0.8168, - "step": 266 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.433734939759036e-06, - "loss": 0.7904, - "step": 267 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.457831325301205e-06, - "loss": 0.9145, - "step": 268 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.481927710843374e-06, - "loss": 0.906, - "step": 269 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.5060240963855425e-06, - "loss": 0.9743, - "step": 270 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.530120481927711e-06, - "loss": 0.8336, - "step": 271 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.554216867469881e-06, - "loss": 0.7867, - "step": 272 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.578313253012049e-06, - "loss": 0.9085, - "step": 273 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.602409638554217e-06, - "loss": 0.9512, - "step": 274 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.626506024096386e-06, - "loss": 0.8147, - "step": 275 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.6506024096385545e-06, - "loss": 0.9192, - "step": 276 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.674698795180723e-06, - "loss": 0.8056, - "step": 277 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.698795180722893e-06, - "loss": 0.8183, - "step": 278 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.722891566265061e-06, - "loss": 0.6551, - "step": 279 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.746987951807229e-06, - "loss": 0.8226, - "step": 280 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.771084337349398e-06, - "loss": 0.7671, - "step": 281 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.7951807228915665e-06, - "loss": 0.8018, - "step": 282 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.819277108433735e-06, - "loss": 0.777, - "step": 283 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.843373493975905e-06, - "loss": 0.7876, - "step": 284 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.867469879518073e-06, - "loss": 0.9569, - "step": 285 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.891566265060241e-06, - "loss": 0.7928, - "step": 286 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.91566265060241e-06, - "loss": 0.7335, - "step": 287 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.9397590361445784e-06, - "loss": 0.8986, - "step": 288 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.963855421686747e-06, - "loss": 0.7613, - "step": 289 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 6.987951807228917e-06, - "loss": 0.9129, - "step": 290 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.012048192771085e-06, - "loss": 0.8022, - "step": 291 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.036144578313253e-06, - "loss": 0.9037, - "step": 292 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.060240963855422e-06, - "loss": 0.856, - "step": 293 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.084337349397591e-06, - "loss": 0.8098, - "step": 294 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.1084337349397595e-06, - "loss": 0.7808, - "step": 295 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.132530120481929e-06, - "loss": 0.9161, - "step": 296 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.156626506024097e-06, - "loss": 0.8992, - "step": 297 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.180722891566265e-06, - "loss": 0.8647, - "step": 298 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.204819277108435e-06, - "loss": 0.8763, - "step": 299 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.228915662650603e-06, - "loss": 0.8016, - "step": 300 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.2530120481927715e-06, - "loss": 0.6918, - "step": 301 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.277108433734941e-06, - "loss": 0.8553, - "step": 302 - }, - { - "epoch": 0.05, - "grad_norm": 0.0, - "learning_rate": 7.301204819277109e-06, - "loss": 0.9211, - "step": 303 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.325301204819277e-06, - "loss": 0.7945, - "step": 304 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.349397590361447e-06, - "loss": 0.8069, - "step": 305 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.373493975903615e-06, - "loss": 0.8427, - "step": 306 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.3975903614457835e-06, - "loss": 0.8196, - "step": 307 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.4216867469879526e-06, - "loss": 0.8111, - "step": 308 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.445783132530121e-06, - "loss": 0.9226, - "step": 309 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.469879518072289e-06, - "loss": 0.8641, - "step": 310 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.493975903614459e-06, - "loss": 0.7666, - "step": 311 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.518072289156627e-06, - "loss": 0.887, - "step": 312 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.5421686746987955e-06, - "loss": 0.8871, - "step": 313 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.5662650602409645e-06, - "loss": 0.9555, - "step": 314 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.590361445783133e-06, - "loss": 0.8899, - "step": 315 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.614457831325302e-06, - "loss": 0.7773, - "step": 316 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.638554216867471e-06, - "loss": 0.7677, - "step": 317 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.66265060240964e-06, - "loss": 0.9246, - "step": 318 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.686746987951807e-06, - "loss": 0.7841, - "step": 319 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.710843373493977e-06, - "loss": 0.8834, - "step": 320 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.734939759036146e-06, - "loss": 0.8319, - "step": 321 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.759036144578314e-06, - "loss": 0.8623, - "step": 322 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.783132530120484e-06, - "loss": 0.9175, - "step": 323 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.807228915662652e-06, - "loss": 0.8815, - "step": 324 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.83132530120482e-06, - "loss": 0.7834, - "step": 325 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.855421686746989e-06, - "loss": 0.896, - "step": 326 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.879518072289157e-06, - "loss": 0.7556, - "step": 327 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.903614457831325e-06, - "loss": 0.9991, - "step": 328 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.927710843373495e-06, - "loss": 0.8776, - "step": 329 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.951807228915663e-06, - "loss": 0.7811, - "step": 330 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 7.975903614457831e-06, - "loss": 0.7771, - "step": 331 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.000000000000001e-06, - "loss": 0.7885, - "step": 332 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.02409638554217e-06, - "loss": 0.9019, - "step": 333 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.048192771084338e-06, - "loss": 0.7934, - "step": 334 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.072289156626508e-06, - "loss": 0.7646, - "step": 335 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.096385542168676e-06, - "loss": 0.7786, - "step": 336 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.120481927710844e-06, - "loss": 0.7507, - "step": 337 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.144578313253012e-06, - "loss": 0.8141, - "step": 338 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.16867469879518e-06, - "loss": 0.8133, - "step": 339 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.19277108433735e-06, - "loss": 0.7704, - "step": 340 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.216867469879519e-06, - "loss": 0.9533, - "step": 341 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.240963855421687e-06, - "loss": 0.8547, - "step": 342 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.265060240963855e-06, - "loss": 0.7733, - "step": 343 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.289156626506025e-06, - "loss": 0.8643, - "step": 344 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.313253012048194e-06, - "loss": 0.8092, - "step": 345 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.337349397590362e-06, - "loss": 0.7129, - "step": 346 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.361445783132532e-06, - "loss": 0.8154, - "step": 347 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.3855421686747e-06, - "loss": 0.8315, - "step": 348 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.409638554216868e-06, - "loss": 0.9148, - "step": 349 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.433734939759038e-06, - "loss": 0.6938, - "step": 350 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.457831325301206e-06, - "loss": 0.893, - "step": 351 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.481927710843375e-06, - "loss": 0.7519, - "step": 352 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.506024096385543e-06, - "loss": 0.8877, - "step": 353 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.530120481927711e-06, - "loss": 0.8307, - "step": 354 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.55421686746988e-06, - "loss": 0.8591, - "step": 355 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.57831325301205e-06, - "loss": 0.7618, - "step": 356 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.602409638554217e-06, - "loss": 1.0092, - "step": 357 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.626506024096386e-06, - "loss": 0.7991, - "step": 358 - }, - { - "epoch": 0.06, - "grad_norm": 0.0, - "learning_rate": 8.650602409638556e-06, - "loss": 0.8579, - "step": 359 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.674698795180724e-06, - "loss": 0.862, - "step": 360 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.698795180722892e-06, - "loss": 0.7851, - "step": 361 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.722891566265062e-06, - "loss": 0.6351, - "step": 362 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.74698795180723e-06, - "loss": 0.7043, - "step": 363 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.771084337349399e-06, - "loss": 0.7474, - "step": 364 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.795180722891567e-06, - "loss": 0.7495, - "step": 365 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.819277108433735e-06, - "loss": 0.8007, - "step": 366 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.843373493975905e-06, - "loss": 0.7329, - "step": 367 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.867469879518073e-06, - "loss": 0.9663, - "step": 368 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.891566265060241e-06, - "loss": 0.8158, - "step": 369 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.91566265060241e-06, - "loss": 0.8685, - "step": 370 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.939759036144578e-06, - "loss": 0.8077, - "step": 371 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.963855421686748e-06, - "loss": 0.8005, - "step": 372 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 8.987951807228916e-06, - "loss": 0.822, - "step": 373 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.012048192771084e-06, - "loss": 0.7725, - "step": 374 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.036144578313254e-06, - "loss": 0.9238, - "step": 375 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.060240963855423e-06, - "loss": 0.912, - "step": 376 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.08433734939759e-06, - "loss": 0.7698, - "step": 377 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.10843373493976e-06, - "loss": 0.7995, - "step": 378 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.132530120481929e-06, - "loss": 0.8354, - "step": 379 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.156626506024097e-06, - "loss": 0.8932, - "step": 380 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.180722891566265e-06, - "loss": 0.8626, - "step": 381 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.204819277108434e-06, - "loss": 0.9256, - "step": 382 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.228915662650602e-06, - "loss": 0.7793, - "step": 383 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.253012048192772e-06, - "loss": 0.9396, - "step": 384 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.27710843373494e-06, - "loss": 0.8284, - "step": 385 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.301204819277108e-06, - "loss": 0.8863, - "step": 386 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.325301204819278e-06, - "loss": 0.9299, - "step": 387 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.349397590361446e-06, - "loss": 0.8654, - "step": 388 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.373493975903615e-06, - "loss": 0.8548, - "step": 389 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.397590361445785e-06, - "loss": 0.773, - "step": 390 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.421686746987953e-06, - "loss": 0.8016, - "step": 391 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.445783132530121e-06, - "loss": 0.7846, - "step": 392 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.46987951807229e-06, - "loss": 0.7898, - "step": 393 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.49397590361446e-06, - "loss": 0.8956, - "step": 394 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.518072289156628e-06, - "loss": 0.8172, - "step": 395 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.542168674698796e-06, - "loss": 0.7323, - "step": 396 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.566265060240964e-06, - "loss": 0.6804, - "step": 397 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.590361445783132e-06, - "loss": 0.8464, - "step": 398 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.614457831325302e-06, - "loss": 0.9424, - "step": 399 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.63855421686747e-06, - "loss": 0.7829, - "step": 400 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.662650602409639e-06, - "loss": 0.7774, - "step": 401 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.686746987951809e-06, - "loss": 0.8778, - "step": 402 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.710843373493977e-06, - "loss": 0.8387, - "step": 403 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.734939759036145e-06, - "loss": 0.9027, - "step": 404 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.759036144578315e-06, - "loss": 0.8493, - "step": 405 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.783132530120483e-06, - "loss": 0.834, - "step": 406 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.807228915662652e-06, - "loss": 0.8505, - "step": 407 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.83132530120482e-06, - "loss": 0.7629, - "step": 408 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.855421686746988e-06, - "loss": 0.7752, - "step": 409 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.879518072289156e-06, - "loss": 0.8088, - "step": 410 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.903614457831326e-06, - "loss": 0.8781, - "step": 411 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.927710843373494e-06, - "loss": 0.8034, - "step": 412 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.951807228915663e-06, - "loss": 0.841, - "step": 413 - }, - { - "epoch": 0.07, - "grad_norm": 0.0, - "learning_rate": 9.975903614457833e-06, - "loss": 0.764, - "step": 414 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1e-05, - "loss": 0.7326, - "step": 415 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.002409638554217e-05, - "loss": 0.9738, - "step": 416 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0048192771084337e-05, - "loss": 0.823, - "step": 417 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0072289156626507e-05, - "loss": 0.7473, - "step": 418 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0096385542168675e-05, - "loss": 0.8763, - "step": 419 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0120481927710844e-05, - "loss": 0.6983, - "step": 420 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0144578313253014e-05, - "loss": 0.7674, - "step": 421 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0168674698795182e-05, - "loss": 0.8069, - "step": 422 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.019277108433735e-05, - "loss": 0.7944, - "step": 423 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0216867469879518e-05, - "loss": 0.8562, - "step": 424 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0240963855421688e-05, - "loss": 0.7855, - "step": 425 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0265060240963855e-05, - "loss": 0.9263, - "step": 426 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0289156626506025e-05, - "loss": 0.8199, - "step": 427 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0313253012048195e-05, - "loss": 0.68, - "step": 428 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0337349397590361e-05, - "loss": 0.7954, - "step": 429 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0361445783132531e-05, - "loss": 0.8096, - "step": 430 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0385542168674701e-05, - "loss": 0.735, - "step": 431 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0409638554216868e-05, - "loss": 0.8425, - "step": 432 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0433734939759038e-05, - "loss": 0.8647, - "step": 433 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0457831325301206e-05, - "loss": 0.8559, - "step": 434 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0481927710843374e-05, - "loss": 0.7954, - "step": 435 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0506024096385542e-05, - "loss": 0.8175, - "step": 436 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0530120481927712e-05, - "loss": 0.7779, - "step": 437 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.055421686746988e-05, - "loss": 0.9166, - "step": 438 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0578313253012049e-05, - "loss": 0.7366, - "step": 439 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0602409638554219e-05, - "loss": 0.7725, - "step": 440 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0626506024096385e-05, - "loss": 0.7047, - "step": 441 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0650602409638555e-05, - "loss": 0.8203, - "step": 442 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0674698795180725e-05, - "loss": 0.9973, - "step": 443 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0698795180722892e-05, - "loss": 0.8032, - "step": 444 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0722891566265062e-05, - "loss": 0.8328, - "step": 445 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.074698795180723e-05, - "loss": 0.8814, - "step": 446 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0771084337349398e-05, - "loss": 1.1439, - "step": 447 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0795180722891568e-05, - "loss": 0.9001, - "step": 448 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0819277108433736e-05, - "loss": 0.7898, - "step": 449 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0843373493975904e-05, - "loss": 0.8579, - "step": 450 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0867469879518073e-05, - "loss": 0.742, - "step": 451 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0891566265060243e-05, - "loss": 0.9117, - "step": 452 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.091566265060241e-05, - "loss": 0.9672, - "step": 453 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0939759036144579e-05, - "loss": 0.8747, - "step": 454 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0963855421686749e-05, - "loss": 0.7581, - "step": 455 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.0987951807228916e-05, - "loss": 0.938, - "step": 456 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1012048192771086e-05, - "loss": 0.8619, - "step": 457 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1036144578313255e-05, - "loss": 0.788, - "step": 458 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1060240963855422e-05, - "loss": 0.9739, - "step": 459 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1084337349397592e-05, - "loss": 0.8631, - "step": 460 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.110843373493976e-05, - "loss": 0.7395, - "step": 461 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1132530120481928e-05, - "loss": 0.7728, - "step": 462 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1156626506024097e-05, - "loss": 0.7909, - "step": 463 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1180722891566267e-05, - "loss": 0.8688, - "step": 464 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1204819277108435e-05, - "loss": 0.7127, - "step": 465 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1228915662650603e-05, - "loss": 0.748, - "step": 466 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.1253012048192773e-05, - "loss": 0.848, - "step": 467 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.127710843373494e-05, - "loss": 0.7251, - "step": 468 - }, - { - "epoch": 0.08, - "grad_norm": 0.0, - "learning_rate": 1.130120481927711e-05, - "loss": 0.7837, - "step": 469 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.132530120481928e-05, - "loss": 0.7985, - "step": 470 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1349397590361446e-05, - "loss": 0.8118, - "step": 471 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1373493975903616e-05, - "loss": 0.8174, - "step": 472 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1397590361445786e-05, - "loss": 0.8072, - "step": 473 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1421686746987952e-05, - "loss": 0.7421, - "step": 474 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1445783132530122e-05, - "loss": 0.8696, - "step": 475 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.146987951807229e-05, - "loss": 0.865, - "step": 476 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1493975903614459e-05, - "loss": 0.8625, - "step": 477 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1518072289156627e-05, - "loss": 0.824, - "step": 478 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1542168674698797e-05, - "loss": 0.7323, - "step": 479 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1566265060240964e-05, - "loss": 0.724, - "step": 480 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1590361445783133e-05, - "loss": 0.8278, - "step": 481 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1614457831325303e-05, - "loss": 0.7231, - "step": 482 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.163855421686747e-05, - "loss": 0.803, - "step": 483 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.166265060240964e-05, - "loss": 0.83, - "step": 484 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.168674698795181e-05, - "loss": 0.7588, - "step": 485 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1710843373493976e-05, - "loss": 0.758, - "step": 486 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1734939759036146e-05, - "loss": 0.976, - "step": 487 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1759036144578315e-05, - "loss": 0.8134, - "step": 488 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1783132530120483e-05, - "loss": 0.8361, - "step": 489 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1807228915662651e-05, - "loss": 0.8264, - "step": 490 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1831325301204821e-05, - "loss": 0.8339, - "step": 491 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.185542168674699e-05, - "loss": 0.8261, - "step": 492 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1879518072289157e-05, - "loss": 0.7546, - "step": 493 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1903614457831327e-05, - "loss": 0.8127, - "step": 494 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1927710843373494e-05, - "loss": 0.815, - "step": 495 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1951807228915664e-05, - "loss": 0.9206, - "step": 496 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.1975903614457834e-05, - "loss": 0.8811, - "step": 497 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2e-05, - "loss": 0.8468, - "step": 498 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.202409638554217e-05, - "loss": 0.6974, - "step": 499 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.204819277108434e-05, - "loss": 0.8873, - "step": 500 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2072289156626507e-05, - "loss": 0.7594, - "step": 501 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2096385542168677e-05, - "loss": 0.8637, - "step": 502 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2120481927710845e-05, - "loss": 0.8354, - "step": 503 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2144578313253013e-05, - "loss": 0.8049, - "step": 504 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2168674698795181e-05, - "loss": 0.8649, - "step": 505 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2192771084337351e-05, - "loss": 0.8318, - "step": 506 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2216867469879518e-05, - "loss": 0.8798, - "step": 507 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2240963855421688e-05, - "loss": 0.7494, - "step": 508 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2265060240963858e-05, - "loss": 0.8543, - "step": 509 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2289156626506024e-05, - "loss": 0.7813, - "step": 510 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2313253012048194e-05, - "loss": 0.8966, - "step": 511 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2337349397590364e-05, - "loss": 0.799, - "step": 512 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.236144578313253e-05, - "loss": 0.7713, - "step": 513 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.23855421686747e-05, - "loss": 0.8882, - "step": 514 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2409638554216869e-05, - "loss": 0.8073, - "step": 515 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2433734939759037e-05, - "loss": 0.7985, - "step": 516 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2457831325301207e-05, - "loss": 0.8972, - "step": 517 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2481927710843375e-05, - "loss": 0.8098, - "step": 518 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2506024096385544e-05, - "loss": 0.8514, - "step": 519 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2530120481927712e-05, - "loss": 0.8342, - "step": 520 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.255421686746988e-05, - "loss": 0.7287, - "step": 521 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2578313253012048e-05, - "loss": 0.8387, - "step": 522 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2602409638554218e-05, - "loss": 0.7817, - "step": 523 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2626506024096385e-05, - "loss": 0.8596, - "step": 524 - }, - { - "epoch": 0.09, - "grad_norm": 0.0, - "learning_rate": 1.2650602409638555e-05, - "loss": 0.7317, - "step": 525 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2674698795180725e-05, - "loss": 0.8183, - "step": 526 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2698795180722891e-05, - "loss": 0.9065, - "step": 527 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2722891566265061e-05, - "loss": 0.8382, - "step": 528 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2746987951807231e-05, - "loss": 0.8263, - "step": 529 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2771084337349398e-05, - "loss": 0.7687, - "step": 530 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2795180722891567e-05, - "loss": 0.9061, - "step": 531 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2819277108433736e-05, - "loss": 0.8361, - "step": 532 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2843373493975904e-05, - "loss": 0.8466, - "step": 533 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2867469879518072e-05, - "loss": 0.7801, - "step": 534 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2891566265060242e-05, - "loss": 0.813, - "step": 535 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.291566265060241e-05, - "loss": 0.7783, - "step": 536 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2939759036144579e-05, - "loss": 0.708, - "step": 537 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2963855421686749e-05, - "loss": 0.7896, - "step": 538 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.2987951807228915e-05, - "loss": 0.7826, - "step": 539 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3012048192771085e-05, - "loss": 0.9017, - "step": 540 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3036144578313255e-05, - "loss": 0.7733, - "step": 541 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3060240963855421e-05, - "loss": 0.8571, - "step": 542 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3084337349397591e-05, - "loss": 0.8613, - "step": 543 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3108433734939761e-05, - "loss": 0.7513, - "step": 544 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3132530120481928e-05, - "loss": 0.7735, - "step": 545 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3156626506024098e-05, - "loss": 0.7764, - "step": 546 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3180722891566266e-05, - "loss": 0.8613, - "step": 547 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3204819277108434e-05, - "loss": 0.8202, - "step": 548 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3228915662650603e-05, - "loss": 0.8146, - "step": 549 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3253012048192772e-05, - "loss": 0.8372, - "step": 550 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3277108433734939e-05, - "loss": 0.8563, - "step": 551 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3301204819277109e-05, - "loss": 0.782, - "step": 552 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3325301204819279e-05, - "loss": 0.8419, - "step": 553 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3349397590361445e-05, - "loss": 0.7429, - "step": 554 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3373493975903615e-05, - "loss": 0.8798, - "step": 555 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3397590361445785e-05, - "loss": 0.8264, - "step": 556 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3421686746987952e-05, - "loss": 0.8249, - "step": 557 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3445783132530122e-05, - "loss": 0.8722, - "step": 558 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.346987951807229e-05, - "loss": 0.7661, - "step": 559 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3493975903614458e-05, - "loss": 0.9545, - "step": 560 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3518072289156628e-05, - "loss": 0.6789, - "step": 561 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3542168674698796e-05, - "loss": 0.7492, - "step": 562 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3566265060240965e-05, - "loss": 0.9422, - "step": 563 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3590361445783133e-05, - "loss": 0.9099, - "step": 564 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3614457831325303e-05, - "loss": 0.8273, - "step": 565 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.363855421686747e-05, - "loss": 1.0561, - "step": 566 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.366265060240964e-05, - "loss": 0.866, - "step": 567 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.368674698795181e-05, - "loss": 0.9599, - "step": 568 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3710843373493976e-05, - "loss": 0.8213, - "step": 569 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3734939759036146e-05, - "loss": 0.8751, - "step": 570 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3759036144578316e-05, - "loss": 0.7742, - "step": 571 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3783132530120482e-05, - "loss": 0.8114, - "step": 572 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3807228915662652e-05, - "loss": 0.6378, - "step": 573 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.383132530120482e-05, - "loss": 0.8486, - "step": 574 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3855421686746989e-05, - "loss": 0.8564, - "step": 575 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3879518072289157e-05, - "loss": 0.8903, - "step": 576 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3903614457831327e-05, - "loss": 0.8535, - "step": 577 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3927710843373493e-05, - "loss": 0.8234, - "step": 578 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3951807228915663e-05, - "loss": 0.7896, - "step": 579 - }, - { - "epoch": 0.1, - "grad_norm": 0.0, - "learning_rate": 1.3975903614457833e-05, - "loss": 0.9158, - "step": 580 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4e-05, - "loss": 0.7806, - "step": 581 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.402409638554217e-05, - "loss": 0.943, - "step": 582 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.404819277108434e-05, - "loss": 0.8603, - "step": 583 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4072289156626506e-05, - "loss": 0.8199, - "step": 584 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4096385542168676e-05, - "loss": 0.8777, - "step": 585 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4120481927710844e-05, - "loss": 0.8295, - "step": 586 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4144578313253013e-05, - "loss": 0.8327, - "step": 587 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4168674698795183e-05, - "loss": 0.9018, - "step": 588 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.419277108433735e-05, - "loss": 0.8383, - "step": 589 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4216867469879519e-05, - "loss": 0.7568, - "step": 590 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4240963855421687e-05, - "loss": 0.8691, - "step": 591 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4265060240963857e-05, - "loss": 0.7241, - "step": 592 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4289156626506024e-05, - "loss": 0.832, - "step": 593 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4313253012048194e-05, - "loss": 0.9047, - "step": 594 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4337349397590364e-05, - "loss": 0.8891, - "step": 595 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.436144578313253e-05, - "loss": 0.8357, - "step": 596 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.43855421686747e-05, - "loss": 0.655, - "step": 597 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.440963855421687e-05, - "loss": 0.8191, - "step": 598 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4433734939759037e-05, - "loss": 0.713, - "step": 599 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4457831325301207e-05, - "loss": 0.8727, - "step": 600 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4481927710843375e-05, - "loss": 0.79, - "step": 601 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4506024096385543e-05, - "loss": 0.7339, - "step": 602 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4530120481927711e-05, - "loss": 0.9656, - "step": 603 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4554216867469881e-05, - "loss": 0.7978, - "step": 604 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.457831325301205e-05, - "loss": 0.6527, - "step": 605 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4602409638554218e-05, - "loss": 0.7831, - "step": 606 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4626506024096388e-05, - "loss": 0.9452, - "step": 607 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4650602409638554e-05, - "loss": 0.7504, - "step": 608 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4674698795180724e-05, - "loss": 0.6439, - "step": 609 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4698795180722894e-05, - "loss": 0.8918, - "step": 610 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.472289156626506e-05, - "loss": 0.9428, - "step": 611 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.474698795180723e-05, - "loss": 0.714, - "step": 612 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4771084337349399e-05, - "loss": 0.8169, - "step": 613 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4795180722891567e-05, - "loss": 0.943, - "step": 614 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4819277108433737e-05, - "loss": 0.7937, - "step": 615 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4843373493975905e-05, - "loss": 0.7377, - "step": 616 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4867469879518073e-05, - "loss": 0.9528, - "step": 617 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4891566265060242e-05, - "loss": 0.7262, - "step": 618 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4915662650602412e-05, - "loss": 0.7822, - "step": 619 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4939759036144578e-05, - "loss": 0.8091, - "step": 620 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4963855421686748e-05, - "loss": 0.8363, - "step": 621 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.4987951807228918e-05, - "loss": 0.8946, - "step": 622 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5012048192771084e-05, - "loss": 0.7689, - "step": 623 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5036144578313254e-05, - "loss": 0.7415, - "step": 624 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5060240963855424e-05, - "loss": 0.8795, - "step": 625 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5084337349397591e-05, - "loss": 0.764, - "step": 626 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5108433734939761e-05, - "loss": 0.825, - "step": 627 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5132530120481929e-05, - "loss": 0.8379, - "step": 628 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5156626506024097e-05, - "loss": 0.9583, - "step": 629 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5180722891566266e-05, - "loss": 0.9744, - "step": 630 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5204819277108436e-05, - "loss": 0.8933, - "step": 631 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5228915662650604e-05, - "loss": 0.7707, - "step": 632 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5253012048192772e-05, - "loss": 0.8753, - "step": 633 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.5277108433734942e-05, - "loss": 0.7738, - "step": 634 - }, - { - "epoch": 0.11, - "grad_norm": 0.0, - "learning_rate": 1.530120481927711e-05, - "loss": 0.6884, - "step": 635 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.532530120481928e-05, - "loss": 0.9115, - "step": 636 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5349397590361447e-05, - "loss": 0.9503, - "step": 637 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5373493975903615e-05, - "loss": 0.831, - "step": 638 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5397590361445783e-05, - "loss": 0.7401, - "step": 639 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5421686746987955e-05, - "loss": 0.7918, - "step": 640 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.544578313253012e-05, - "loss": 0.7213, - "step": 641 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.546987951807229e-05, - "loss": 1.023, - "step": 642 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.549397590361446e-05, - "loss": 0.7501, - "step": 643 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5518072289156628e-05, - "loss": 0.8877, - "step": 644 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5542168674698796e-05, - "loss": 0.8311, - "step": 645 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5566265060240968e-05, - "loss": 0.8448, - "step": 646 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5590361445783132e-05, - "loss": 0.9539, - "step": 647 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5614457831325304e-05, - "loss": 0.7503, - "step": 648 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5638554216867472e-05, - "loss": 0.9405, - "step": 649 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.566265060240964e-05, - "loss": 0.7722, - "step": 650 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.568674698795181e-05, - "loss": 0.734, - "step": 651 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5710843373493977e-05, - "loss": 1.0274, - "step": 652 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5734939759036145e-05, - "loss": 0.7991, - "step": 653 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5759036144578313e-05, - "loss": 0.8659, - "step": 654 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5783132530120485e-05, - "loss": 0.9124, - "step": 655 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.580722891566265e-05, - "loss": 0.8983, - "step": 656 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.583132530120482e-05, - "loss": 0.7796, - "step": 657 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.585542168674699e-05, - "loss": 0.8663, - "step": 658 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5879518072289158e-05, - "loss": 0.897, - "step": 659 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5903614457831326e-05, - "loss": 0.7651, - "step": 660 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5927710843373495e-05, - "loss": 0.9907, - "step": 661 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5951807228915663e-05, - "loss": 1.0062, - "step": 662 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.5975903614457834e-05, - "loss": 0.8273, - "step": 663 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.7483, - "step": 664 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.602409638554217e-05, - "loss": 0.9363, - "step": 665 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.604819277108434e-05, - "loss": 0.8067, - "step": 666 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6072289156626507e-05, - "loss": 0.9331, - "step": 667 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6096385542168676e-05, - "loss": 0.8712, - "step": 668 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6120481927710844e-05, - "loss": 0.7974, - "step": 669 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6144578313253015e-05, - "loss": 0.8088, - "step": 670 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.616867469879518e-05, - "loss": 0.9242, - "step": 671 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6192771084337352e-05, - "loss": 0.8058, - "step": 672 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.621686746987952e-05, - "loss": 0.8111, - "step": 673 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.624096385542169e-05, - "loss": 0.8442, - "step": 674 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6265060240963857e-05, - "loss": 0.8296, - "step": 675 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6289156626506025e-05, - "loss": 0.877, - "step": 676 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6313253012048193e-05, - "loss": 0.8806, - "step": 677 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.633734939759036e-05, - "loss": 0.9548, - "step": 678 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6361445783132533e-05, - "loss": 0.8017, - "step": 679 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.63855421686747e-05, - "loss": 0.918, - "step": 680 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.640963855421687e-05, - "loss": 0.839, - "step": 681 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6433734939759038e-05, - "loss": 0.8135, - "step": 682 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6457831325301206e-05, - "loss": 1.0431, - "step": 683 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6481927710843374e-05, - "loss": 0.6241, - "step": 684 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6506024096385546e-05, - "loss": 0.9869, - "step": 685 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.653012048192771e-05, - "loss": 0.8146, - "step": 686 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6554216867469882e-05, - "loss": 0.7295, - "step": 687 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.657831325301205e-05, - "loss": 0.7839, - "step": 688 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.660240963855422e-05, - "loss": 0.8223, - "step": 689 - }, - { - "epoch": 0.12, - "grad_norm": 0.0, - "learning_rate": 1.6626506024096387e-05, - "loss": 0.932, - "step": 690 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6650602409638555e-05, - "loss": 0.9647, - "step": 691 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6674698795180724e-05, - "loss": 0.7324, - "step": 692 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6698795180722892e-05, - "loss": 0.7447, - "step": 693 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6722891566265063e-05, - "loss": 0.9523, - "step": 694 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6746987951807228e-05, - "loss": 0.8984, - "step": 695 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.67710843373494e-05, - "loss": 0.9026, - "step": 696 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6795180722891568e-05, - "loss": 0.7746, - "step": 697 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6819277108433736e-05, - "loss": 0.8494, - "step": 698 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6843373493975905e-05, - "loss": 0.8838, - "step": 699 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6867469879518076e-05, - "loss": 0.8216, - "step": 700 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.689156626506024e-05, - "loss": 0.7176, - "step": 701 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6915662650602413e-05, - "loss": 0.8574, - "step": 702 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.693975903614458e-05, - "loss": 0.8059, - "step": 703 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.696385542168675e-05, - "loss": 0.7631, - "step": 704 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.6987951807228917e-05, - "loss": 0.7392, - "step": 705 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7012048192771086e-05, - "loss": 0.7746, - "step": 706 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7036144578313254e-05, - "loss": 0.813, - "step": 707 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7060240963855422e-05, - "loss": 0.9697, - "step": 708 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7084337349397594e-05, - "loss": 0.8612, - "step": 709 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.710843373493976e-05, - "loss": 0.6941, - "step": 710 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.713253012048193e-05, - "loss": 0.8966, - "step": 711 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.71566265060241e-05, - "loss": 0.8825, - "step": 712 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7180722891566267e-05, - "loss": 0.9562, - "step": 713 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7204819277108435e-05, - "loss": 0.8267, - "step": 714 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7228915662650603e-05, - "loss": 0.9178, - "step": 715 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.725301204819277e-05, - "loss": 0.9027, - "step": 716 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7277108433734943e-05, - "loss": 1.0268, - "step": 717 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.730120481927711e-05, - "loss": 0.7622, - "step": 718 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.732530120481928e-05, - "loss": 0.9937, - "step": 719 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7349397590361448e-05, - "loss": 0.7412, - "step": 720 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7373493975903616e-05, - "loss": 0.9304, - "step": 721 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7397590361445784e-05, - "loss": 0.93, - "step": 722 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7421686746987953e-05, - "loss": 0.8848, - "step": 723 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7445783132530124e-05, - "loss": 0.7673, - "step": 724 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.746987951807229e-05, - "loss": 0.8055, - "step": 725 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.749397590361446e-05, - "loss": 0.9333, - "step": 726 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7518072289156625e-05, - "loss": 0.797, - "step": 727 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7542168674698797e-05, - "loss": 0.8106, - "step": 728 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7566265060240965e-05, - "loss": 0.8243, - "step": 729 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7590361445783134e-05, - "loss": 0.9452, - "step": 730 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7614457831325302e-05, - "loss": 0.881, - "step": 731 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.763855421686747e-05, - "loss": 0.7621, - "step": 732 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.766265060240964e-05, - "loss": 0.7706, - "step": 733 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.768674698795181e-05, - "loss": 0.9526, - "step": 734 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7710843373493978e-05, - "loss": 0.9508, - "step": 735 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7734939759036146e-05, - "loss": 0.7753, - "step": 736 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7759036144578315e-05, - "loss": 0.9134, - "step": 737 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7783132530120483e-05, - "loss": 0.9427, - "step": 738 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.780722891566265e-05, - "loss": 0.8635, - "step": 739 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.783132530120482e-05, - "loss": 0.81, - "step": 740 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.785542168674699e-05, - "loss": 0.8238, - "step": 741 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7879518072289156e-05, - "loss": 0.6559, - "step": 742 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7903614457831327e-05, - "loss": 0.9639, - "step": 743 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7927710843373496e-05, - "loss": 0.8653, - "step": 744 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7951807228915664e-05, - "loss": 0.8539, - "step": 745 - }, - { - "epoch": 0.13, - "grad_norm": 0.0, - "learning_rate": 1.7975903614457832e-05, - "loss": 0.8866, - "step": 746 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8e-05, - "loss": 0.7661, - "step": 747 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.802409638554217e-05, - "loss": 0.8454, - "step": 748 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8048192771084337e-05, - "loss": 0.7512, - "step": 749 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.807228915662651e-05, - "loss": 0.8798, - "step": 750 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8096385542168677e-05, - "loss": 0.7057, - "step": 751 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8120481927710845e-05, - "loss": 0.7692, - "step": 752 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8144578313253013e-05, - "loss": 0.8735, - "step": 753 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.816867469879518e-05, - "loss": 0.7562, - "step": 754 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.819277108433735e-05, - "loss": 0.9751, - "step": 755 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.821686746987952e-05, - "loss": 0.7794, - "step": 756 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8240963855421686e-05, - "loss": 0.8734, - "step": 757 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8265060240963858e-05, - "loss": 0.7236, - "step": 758 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8289156626506026e-05, - "loss": 0.8797, - "step": 759 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8313253012048194e-05, - "loss": 0.9278, - "step": 760 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8337349397590363e-05, - "loss": 0.9905, - "step": 761 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.836144578313253e-05, - "loss": 0.909, - "step": 762 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.83855421686747e-05, - "loss": 0.7593, - "step": 763 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8409638554216867e-05, - "loss": 0.818, - "step": 764 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.843373493975904e-05, - "loss": 0.8468, - "step": 765 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8457831325301204e-05, - "loss": 0.7958, - "step": 766 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8481927710843375e-05, - "loss": 0.7064, - "step": 767 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8506024096385544e-05, - "loss": 0.8483, - "step": 768 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8530120481927712e-05, - "loss": 0.7095, - "step": 769 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.855421686746988e-05, - "loss": 0.7807, - "step": 770 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8578313253012052e-05, - "loss": 0.8966, - "step": 771 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8602409638554217e-05, - "loss": 0.7545, - "step": 772 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8626506024096388e-05, - "loss": 0.7627, - "step": 773 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8650602409638556e-05, - "loss": 0.9973, - "step": 774 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8674698795180725e-05, - "loss": 0.8538, - "step": 775 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8698795180722893e-05, - "loss": 0.8839, - "step": 776 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.872289156626506e-05, - "loss": 0.8812, - "step": 777 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.874698795180723e-05, - "loss": 0.7958, - "step": 778 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8771084337349398e-05, - "loss": 0.8466, - "step": 779 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.879518072289157e-05, - "loss": 0.9, - "step": 780 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8819277108433734e-05, - "loss": 0.8755, - "step": 781 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8843373493975906e-05, - "loss": 0.7816, - "step": 782 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8867469879518074e-05, - "loss": 0.7936, - "step": 783 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8891566265060242e-05, - "loss": 0.8047, - "step": 784 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.891566265060241e-05, - "loss": 0.8619, - "step": 785 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.893975903614458e-05, - "loss": 0.8479, - "step": 786 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.8963855421686747e-05, - "loss": 0.8312, - "step": 787 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.898795180722892e-05, - "loss": 0.825, - "step": 788 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9012048192771087e-05, - "loss": 0.8654, - "step": 789 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9036144578313255e-05, - "loss": 0.8189, - "step": 790 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9060240963855423e-05, - "loss": 0.8135, - "step": 791 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.908433734939759e-05, - "loss": 0.7608, - "step": 792 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.910843373493976e-05, - "loss": 0.8347, - "step": 793 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9132530120481928e-05, - "loss": 0.7895, - "step": 794 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.91566265060241e-05, - "loss": 0.7617, - "step": 795 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9180722891566265e-05, - "loss": 0.7018, - "step": 796 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9204819277108436e-05, - "loss": 0.8941, - "step": 797 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9228915662650604e-05, - "loss": 0.8079, - "step": 798 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.9253012048192773e-05, - "loss": 0.9282, - "step": 799 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.927710843373494e-05, - "loss": 0.7699, - "step": 800 - }, - { - "epoch": 0.14, - "grad_norm": 0.0, - "learning_rate": 1.930120481927711e-05, - "loss": 0.8937, - "step": 801 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9325301204819277e-05, - "loss": 0.7714, - "step": 802 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9349397590361446e-05, - "loss": 0.9018, - "step": 803 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9373493975903617e-05, - "loss": 0.7864, - "step": 804 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9397590361445785e-05, - "loss": 0.7635, - "step": 805 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9421686746987954e-05, - "loss": 0.6852, - "step": 806 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9445783132530122e-05, - "loss": 0.7817, - "step": 807 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.946987951807229e-05, - "loss": 0.7987, - "step": 808 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.949397590361446e-05, - "loss": 0.8551, - "step": 809 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.951807228915663e-05, - "loss": 0.834, - "step": 810 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9542168674698795e-05, - "loss": 0.7426, - "step": 811 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9566265060240967e-05, - "loss": 0.8321, - "step": 812 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9590361445783135e-05, - "loss": 0.8372, - "step": 813 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9614457831325303e-05, - "loss": 0.8228, - "step": 814 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.963855421686747e-05, - "loss": 0.7762, - "step": 815 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.966265060240964e-05, - "loss": 0.7392, - "step": 816 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9686746987951808e-05, - "loss": 0.8179, - "step": 817 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9710843373493976e-05, - "loss": 0.7442, - "step": 818 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9734939759036148e-05, - "loss": 0.8081, - "step": 819 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9759036144578312e-05, - "loss": 0.7437, - "step": 820 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9783132530120484e-05, - "loss": 0.9125, - "step": 821 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9807228915662652e-05, - "loss": 0.755, - "step": 822 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.983132530120482e-05, - "loss": 0.9175, - "step": 823 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.985542168674699e-05, - "loss": 0.7958, - "step": 824 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.987951807228916e-05, - "loss": 0.8267, - "step": 825 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9903614457831325e-05, - "loss": 0.9907, - "step": 826 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9927710843373497e-05, - "loss": 0.8388, - "step": 827 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9951807228915665e-05, - "loss": 0.8516, - "step": 828 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9975903614457833e-05, - "loss": 0.7344, - "step": 829 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 2e-05, - "loss": 0.8846, - "step": 830 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999999801091438e-05, - "loss": 0.7368, - "step": 831 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999999920436575e-05, - "loss": 0.844, - "step": 832 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999999820982296e-05, - "loss": 0.8631, - "step": 833 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999996817463112e-05, - "loss": 0.9843, - "step": 834 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999999502728626e-05, - "loss": 0.8708, - "step": 835 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999992839292475e-05, - "loss": 0.8354, - "step": 836 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999990253481844e-05, - "loss": 0.8061, - "step": 837 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999987269854468e-05, - "loss": 0.945, - "step": 838 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999998388841047e-05, - "loss": 0.7674, - "step": 839 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999998010914998e-05, - "loss": 0.9892, - "step": 840 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999975932073154e-05, - "loss": 0.8944, - "step": 841 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999971357180152e-05, - "loss": 0.8043, - "step": 842 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999966384471157e-05, - "loss": 0.8742, - "step": 843 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999961013946372e-05, - "loss": 0.8126, - "step": 844 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999955245606004e-05, - "loss": 0.798, - "step": 845 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999949079450287e-05, - "loss": 0.7812, - "step": 846 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999942515479464e-05, - "loss": 0.7992, - "step": 847 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999935553693796e-05, - "loss": 0.7882, - "step": 848 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999928194093563e-05, - "loss": 1.1063, - "step": 849 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999920436679053e-05, - "loss": 0.9364, - "step": 850 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999912281450577e-05, - "loss": 0.9001, - "step": 851 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999903728408463e-05, - "loss": 1.0011, - "step": 852 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999894777553045e-05, - "loss": 0.8078, - "step": 853 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999885428884684e-05, - "loss": 0.7155, - "step": 854 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.999987568240375e-05, - "loss": 0.7858, - "step": 855 - }, - { - "epoch": 0.15, - "grad_norm": 0.0, - "learning_rate": 1.9999865538110628e-05, - "loss": 0.8367, - "step": 856 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999854996005726e-05, - "loss": 0.7991, - "step": 857 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999844056089463e-05, - "loss": 0.7343, - "step": 858 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999983271836227e-05, - "loss": 0.9236, - "step": 859 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999820982824603e-05, - "loss": 0.844, - "step": 860 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999808849476925e-05, - "loss": 0.965, - "step": 861 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999979631831972e-05, - "loss": 0.7785, - "step": 862 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999783389353488e-05, - "loss": 0.9395, - "step": 863 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999770062578742e-05, - "loss": 0.8392, - "step": 864 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999756337996013e-05, - "loss": 0.755, - "step": 865 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999742215605846e-05, - "loss": 0.8624, - "step": 866 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999727695408803e-05, - "loss": 0.8459, - "step": 867 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999712777405464e-05, - "loss": 0.7931, - "step": 868 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999697461596415e-05, - "loss": 0.7829, - "step": 869 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999681747982272e-05, - "loss": 0.8278, - "step": 870 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999966563656366e-05, - "loss": 0.9334, - "step": 871 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999964912734122e-05, - "loss": 0.8178, - "step": 872 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999632220315606e-05, - "loss": 0.7862, - "step": 873 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999614915487493e-05, - "loss": 0.7956, - "step": 874 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999597212857566e-05, - "loss": 0.9177, - "step": 875 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999579112426534e-05, - "loss": 0.8297, - "step": 876 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999560614195114e-05, - "loss": 0.8482, - "step": 877 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999541718164043e-05, - "loss": 0.7799, - "step": 878 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999952242433407e-05, - "loss": 0.9025, - "step": 879 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999502732705965e-05, - "loss": 0.8762, - "step": 880 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999482643280515e-05, - "loss": 0.9131, - "step": 881 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999462156058512e-05, - "loss": 0.862, - "step": 882 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999441271040774e-05, - "loss": 0.8278, - "step": 883 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999419988228134e-05, - "loss": 0.8303, - "step": 884 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999398307621436e-05, - "loss": 0.8759, - "step": 885 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999376229221547e-05, - "loss": 0.6499, - "step": 886 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999353753029334e-05, - "loss": 0.8114, - "step": 887 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999330879045706e-05, - "loss": 0.8901, - "step": 888 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999307607271567e-05, - "loss": 0.8193, - "step": 889 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999283937707835e-05, - "loss": 0.8061, - "step": 890 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999259870355462e-05, - "loss": 0.6911, - "step": 891 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.99992354052154e-05, - "loss": 0.8335, - "step": 892 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999210542288627e-05, - "loss": 0.8642, - "step": 893 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999185281576126e-05, - "loss": 0.8829, - "step": 894 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999159623078904e-05, - "loss": 0.8012, - "step": 895 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999133566797985e-05, - "loss": 0.8873, - "step": 896 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999107112734402e-05, - "loss": 0.7672, - "step": 897 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999908026088921e-05, - "loss": 0.8314, - "step": 898 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9999053011263473e-05, - "loss": 0.8244, - "step": 899 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999902536385828e-05, - "loss": 0.9776, - "step": 900 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999899731867473e-05, - "loss": 0.7481, - "step": 901 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998968875713935e-05, - "loss": 0.8499, - "step": 902 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999894003497703e-05, - "loss": 1.0103, - "step": 903 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999891079646516e-05, - "loss": 0.9517, - "step": 904 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998881160179494e-05, - "loss": 0.7498, - "step": 905 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998851126121205e-05, - "loss": 0.9382, - "step": 906 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998820694291487e-05, - "loss": 0.8338, - "step": 907 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998789864691554e-05, - "loss": 0.7125, - "step": 908 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.999875863732263e-05, - "loss": 0.8249, - "step": 909 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.9998727012185957e-05, - "loss": 0.9107, - "step": 910 - }, - { - "epoch": 0.16, - "grad_norm": 0.0, - "learning_rate": 1.99986949892828e-05, - "loss": 0.9819, - "step": 911 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998662568614425e-05, - "loss": 0.9957, - "step": 912 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999862975018212e-05, - "loss": 0.9113, - "step": 913 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.99985965339872e-05, - "loss": 0.7648, - "step": 914 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998562920030983e-05, - "loss": 0.8056, - "step": 915 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.99985289083148e-05, - "loss": 0.8912, - "step": 916 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998494498840012e-05, - "loss": 0.9263, - "step": 917 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999845969160798e-05, - "loss": 0.7704, - "step": 918 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998424486620097e-05, - "loss": 0.8908, - "step": 919 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999838888387776e-05, - "loss": 0.9376, - "step": 920 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999835288338238e-05, - "loss": 0.857, - "step": 921 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998316485135398e-05, - "loss": 0.828, - "step": 922 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999827968913826e-05, - "loss": 0.8733, - "step": 923 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998242495392426e-05, - "loss": 0.9131, - "step": 924 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998204903899377e-05, - "loss": 0.878, - "step": 925 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999816691466061e-05, - "loss": 0.8718, - "step": 926 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998128527677634e-05, - "loss": 0.7844, - "step": 927 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999808974295198e-05, - "loss": 0.975, - "step": 928 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998050560485185e-05, - "loss": 0.7996, - "step": 929 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9998010980278814e-05, - "loss": 0.9902, - "step": 930 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997971002334434e-05, - "loss": 0.8519, - "step": 931 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997930626653646e-05, - "loss": 0.7481, - "step": 932 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997889853238047e-05, - "loss": 0.8675, - "step": 933 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997848682089264e-05, - "loss": 0.7628, - "step": 934 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997807113208934e-05, - "loss": 0.7466, - "step": 935 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997765146598707e-05, - "loss": 0.8771, - "step": 936 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997722782260257e-05, - "loss": 0.8563, - "step": 937 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997680020195266e-05, - "loss": 0.841, - "step": 938 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997636860405437e-05, - "loss": 0.7906, - "step": 939 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997593302892488e-05, - "loss": 0.7499, - "step": 940 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997549347658148e-05, - "loss": 0.8883, - "step": 941 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997504994704174e-05, - "loss": 0.8261, - "step": 942 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999746024403232e-05, - "loss": 0.8681, - "step": 943 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997415095644372e-05, - "loss": 0.8812, - "step": 944 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997369549542126e-05, - "loss": 0.7449, - "step": 945 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999732360572739e-05, - "loss": 0.9578, - "step": 946 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997277264201997e-05, - "loss": 0.8232, - "step": 947 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999723052496779e-05, - "loss": 1.0557, - "step": 948 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997183388026622e-05, - "loss": 0.85, - "step": 949 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997135853380376e-05, - "loss": 0.8929, - "step": 950 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997087921030935e-05, - "loss": 0.7833, - "step": 951 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9997039590980215e-05, - "loss": 0.9816, - "step": 952 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996990863230136e-05, - "loss": 0.8796, - "step": 953 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996941737782628e-05, - "loss": 0.7963, - "step": 954 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999689221463966e-05, - "loss": 0.7593, - "step": 955 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996842293803186e-05, - "loss": 0.9257, - "step": 956 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996791975275206e-05, - "loss": 0.8378, - "step": 957 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996741259057715e-05, - "loss": 0.8478, - "step": 958 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999669014515273e-05, - "loss": 0.7481, - "step": 959 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996638633562286e-05, - "loss": 0.7181, - "step": 960 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996586724288433e-05, - "loss": 0.9367, - "step": 961 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996534417333232e-05, - "loss": 0.7463, - "step": 962 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999648171269877e-05, - "loss": 0.7964, - "step": 963 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996428610387138e-05, - "loss": 0.9307, - "step": 964 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999637511040045e-05, - "loss": 0.8208, - "step": 965 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.999632121274084e-05, - "loss": 0.8282, - "step": 966 - }, - { - "epoch": 0.17, - "grad_norm": 0.0, - "learning_rate": 1.9996266917410442e-05, - "loss": 0.8161, - "step": 967 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9996212224411428e-05, - "loss": 0.8516, - "step": 968 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9996157133745963e-05, - "loss": 0.7933, - "step": 969 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9996101645416242e-05, - "loss": 0.9104, - "step": 970 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9996045759424475e-05, - "loss": 0.8157, - "step": 971 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995989475772887e-05, - "loss": 0.7584, - "step": 972 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995932794463704e-05, - "loss": 0.8376, - "step": 973 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.99958757154992e-05, - "loss": 0.8847, - "step": 974 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995818238881633e-05, - "loss": 0.8817, - "step": 975 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995760364613294e-05, - "loss": 0.8086, - "step": 976 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999570209269648e-05, - "loss": 0.9217, - "step": 977 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995643423133516e-05, - "loss": 0.7955, - "step": 978 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995584355926733e-05, - "loss": 0.8831, - "step": 979 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995524891078482e-05, - "loss": 0.7532, - "step": 980 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995465028591122e-05, - "loss": 0.9325, - "step": 981 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995404768467046e-05, - "loss": 0.7924, - "step": 982 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995344110708645e-05, - "loss": 0.8451, - "step": 983 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999528305531833e-05, - "loss": 0.8435, - "step": 984 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995221602298534e-05, - "loss": 0.8719, - "step": 985 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995159751651697e-05, - "loss": 0.7287, - "step": 986 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9995097503380286e-05, - "loss": 0.8109, - "step": 987 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999503485748677e-05, - "loss": 0.9123, - "step": 988 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994971813973646e-05, - "loss": 0.9565, - "step": 989 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999490837284342e-05, - "loss": 0.7392, - "step": 990 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999484453409862e-05, - "loss": 0.8824, - "step": 991 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994780297741784e-05, - "loss": 0.7627, - "step": 992 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994715663775463e-05, - "loss": 0.7735, - "step": 993 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994650632202227e-05, - "loss": 0.7247, - "step": 994 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994585203024672e-05, - "loss": 0.6318, - "step": 995 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994519376245393e-05, - "loss": 0.7777, - "step": 996 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994453151867015e-05, - "loss": 0.806, - "step": 997 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994386529892168e-05, - "loss": 0.9578, - "step": 998 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.99943195103235e-05, - "loss": 0.8045, - "step": 999 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994252093163685e-05, - "loss": 0.686, - "step": 1000 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994184278415402e-05, - "loss": 0.8952, - "step": 1001 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994116066081342e-05, - "loss": 0.7683, - "step": 1002 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9994047456164228e-05, - "loss": 0.8268, - "step": 1003 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999397844866678e-05, - "loss": 0.813, - "step": 1004 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993909043591754e-05, - "loss": 0.8608, - "step": 1005 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993839240941905e-05, - "loss": 0.9376, - "step": 1006 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999376904072001e-05, - "loss": 0.8627, - "step": 1007 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993698442928862e-05, - "loss": 0.9004, - "step": 1008 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993627447571273e-05, - "loss": 0.7739, - "step": 1009 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999355605465006e-05, - "loss": 0.7913, - "step": 1010 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999348426416807e-05, - "loss": 0.8241, - "step": 1011 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993412076128154e-05, - "loss": 0.8043, - "step": 1012 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993339490533182e-05, - "loss": 0.842, - "step": 1013 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999326650738605e-05, - "loss": 1.0425, - "step": 1014 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999319312668966e-05, - "loss": 0.9701, - "step": 1015 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993119348446927e-05, - "loss": 0.9604, - "step": 1016 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9993045172660785e-05, - "loss": 0.8758, - "step": 1017 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.999297059933419e-05, - "loss": 0.8114, - "step": 1018 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9992895628470098e-05, - "loss": 0.8814, - "step": 1019 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9992820260071503e-05, - "loss": 0.7533, - "step": 1020 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.99927444941414e-05, - "loss": 0.9043, - "step": 1021 - }, - { - "epoch": 0.18, - "grad_norm": 0.0, - "learning_rate": 1.9992668330682806e-05, - "loss": 0.9112, - "step": 1022 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992591769698745e-05, - "loss": 0.7817, - "step": 1023 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992514811192263e-05, - "loss": 0.8618, - "step": 1024 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992437455166425e-05, - "loss": 0.787, - "step": 1025 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992359701624305e-05, - "loss": 0.8493, - "step": 1026 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992281550569003e-05, - "loss": 0.9832, - "step": 1027 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992203002003622e-05, - "loss": 0.7223, - "step": 1028 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992124055931287e-05, - "loss": 0.7506, - "step": 1029 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9992044712355137e-05, - "loss": 0.8601, - "step": 1030 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999196497127833e-05, - "loss": 0.9816, - "step": 1031 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991884832704044e-05, - "loss": 0.8251, - "step": 1032 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991804296635463e-05, - "loss": 0.7447, - "step": 1033 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991723363075787e-05, - "loss": 0.8595, - "step": 1034 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999164203202824e-05, - "loss": 0.7601, - "step": 1035 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999156030349606e-05, - "loss": 0.8528, - "step": 1036 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999147817748249e-05, - "loss": 0.8629, - "step": 1037 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.99913956539908e-05, - "loss": 0.7622, - "step": 1038 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991312733024277e-05, - "loss": 1.0114, - "step": 1039 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991229414586217e-05, - "loss": 0.7045, - "step": 1040 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991145698679937e-05, - "loss": 0.9153, - "step": 1041 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9991061585308764e-05, - "loss": 0.8985, - "step": 1042 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990977074476045e-05, - "loss": 0.8969, - "step": 1043 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990892166185144e-05, - "loss": 0.7428, - "step": 1044 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990806860439437e-05, - "loss": 0.6246, - "step": 1045 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990721157242318e-05, - "loss": 0.8976, - "step": 1046 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999063505659719e-05, - "loss": 0.7304, - "step": 1047 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990548558507493e-05, - "loss": 0.8354, - "step": 1048 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990461662976656e-05, - "loss": 0.82, - "step": 1049 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990374370008142e-05, - "loss": 0.8958, - "step": 1050 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999028667960542e-05, - "loss": 0.8584, - "step": 1051 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990198591771977e-05, - "loss": 0.8189, - "step": 1052 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.999011010651132e-05, - "loss": 0.8601, - "step": 1053 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9990021223826973e-05, - "loss": 0.752, - "step": 1054 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989931943722464e-05, - "loss": 0.8137, - "step": 1055 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998984226620135e-05, - "loss": 0.7703, - "step": 1056 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989752191267197e-05, - "loss": 0.7958, - "step": 1057 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989661718923587e-05, - "loss": 0.78, - "step": 1058 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998957084917412e-05, - "loss": 0.8294, - "step": 1059 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989479582022415e-05, - "loss": 0.7459, - "step": 1060 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989387917472102e-05, - "loss": 0.7629, - "step": 1061 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998929585552682e-05, - "loss": 0.8753, - "step": 1062 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989203396190235e-05, - "loss": 1.0035, - "step": 1063 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998911053946603e-05, - "loss": 0.8156, - "step": 1064 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9989017285357893e-05, - "loss": 0.9749, - "step": 1065 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988923633869538e-05, - "loss": 0.916, - "step": 1066 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988829585004684e-05, - "loss": 0.9617, - "step": 1067 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988735138767083e-05, - "loss": 0.8068, - "step": 1068 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988640295160485e-05, - "loss": 0.89, - "step": 1069 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988545054188665e-05, - "loss": 0.8224, - "step": 1070 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998844941585541e-05, - "loss": 0.8136, - "step": 1071 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998835338016453e-05, - "loss": 0.8851, - "step": 1072 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988256947119837e-05, - "loss": 0.7602, - "step": 1073 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988160116725172e-05, - "loss": 0.8407, - "step": 1074 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9988062888984387e-05, - "loss": 0.8101, - "step": 1075 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.998796526390135e-05, - "loss": 0.8224, - "step": 1076 - }, - { - "epoch": 0.19, - "grad_norm": 0.0, - "learning_rate": 1.9987867241479947e-05, - "loss": 0.8259, - "step": 1077 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9987768821724075e-05, - "loss": 0.8547, - "step": 1078 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9987670004637647e-05, - "loss": 0.9067, - "step": 1079 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.99875707902246e-05, - "loss": 0.8504, - "step": 1080 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9987471178488872e-05, - "loss": 0.7545, - "step": 1081 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998737116943443e-05, - "loss": 0.8221, - "step": 1082 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998727076306526e-05, - "loss": 0.797, - "step": 1083 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9987169959385345e-05, - "loss": 0.8399, - "step": 1084 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9987068758398704e-05, - "loss": 0.8781, - "step": 1085 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986967160109356e-05, - "loss": 0.7778, - "step": 1086 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986865164521348e-05, - "loss": 0.8408, - "step": 1087 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986762771638734e-05, - "loss": 0.952, - "step": 1088 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986659981465588e-05, - "loss": 0.7904, - "step": 1089 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986556794005997e-05, - "loss": 0.6607, - "step": 1090 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986453209264074e-05, - "loss": 0.8706, - "step": 1091 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986349227243933e-05, - "loss": 0.8951, - "step": 1092 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998624484794971e-05, - "loss": 0.8282, - "step": 1093 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986140071385566e-05, - "loss": 0.7012, - "step": 1094 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9986034897555658e-05, - "loss": 0.9174, - "step": 1095 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9985929326464174e-05, - "loss": 0.7628, - "step": 1096 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9985823358115315e-05, - "loss": 0.8284, - "step": 1097 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9985716992513297e-05, - "loss": 0.7527, - "step": 1098 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998561022966235e-05, - "loss": 0.8691, - "step": 1099 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998550306956672e-05, - "loss": 0.8348, - "step": 1100 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9985395512230676e-05, - "loss": 0.8643, - "step": 1101 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998528755765849e-05, - "loss": 0.9082, - "step": 1102 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998517920585446e-05, - "loss": 0.8803, - "step": 1103 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9985070456822894e-05, - "loss": 0.9296, - "step": 1104 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998496131056812e-05, - "loss": 0.8348, - "step": 1105 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984851767094482e-05, - "loss": 0.9213, - "step": 1106 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984741826406337e-05, - "loss": 0.8035, - "step": 1107 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998463148850805e-05, - "loss": 0.7837, - "step": 1108 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984520753404025e-05, - "loss": 0.869, - "step": 1109 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984409621098658e-05, - "loss": 0.8553, - "step": 1110 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984298091596373e-05, - "loss": 0.8447, - "step": 1111 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984186164901603e-05, - "loss": 0.8126, - "step": 1112 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9984073841018807e-05, - "loss": 0.7941, - "step": 1113 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998396111995245e-05, - "loss": 0.9346, - "step": 1114 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998384800170701e-05, - "loss": 0.7272, - "step": 1115 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983734486287e-05, - "loss": 0.7206, - "step": 1116 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983620573696927e-05, - "loss": 0.9196, - "step": 1117 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983506263941327e-05, - "loss": 0.8486, - "step": 1118 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983391557024737e-05, - "loss": 0.9642, - "step": 1119 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983276452951736e-05, - "loss": 0.8669, - "step": 1120 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983160951726893e-05, - "loss": 0.8288, - "step": 1121 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9983045053354807e-05, - "loss": 0.8258, - "step": 1122 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982928757840084e-05, - "loss": 0.9234, - "step": 1123 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982812065187355e-05, - "loss": 0.7235, - "step": 1124 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998269497540126e-05, - "loss": 0.7542, - "step": 1125 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982577488486457e-05, - "loss": 0.9503, - "step": 1126 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982459604447622e-05, - "loss": 0.8286, - "step": 1127 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998234132328944e-05, - "loss": 0.9052, - "step": 1128 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982222645016623e-05, - "loss": 0.9711, - "step": 1129 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9982103569633887e-05, - "loss": 0.7796, - "step": 1130 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.998198409714597e-05, - "loss": 0.9438, - "step": 1131 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9981864227557628e-05, - "loss": 0.8097, - "step": 1132 - }, - { - "epoch": 0.2, - "grad_norm": 0.0, - "learning_rate": 1.9981743960873626e-05, - "loss": 0.8664, - "step": 1133 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.998162329709875e-05, - "loss": 0.7541, - "step": 1134 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.99815022362378e-05, - "loss": 0.9116, - "step": 1135 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.998138077829559e-05, - "loss": 0.8209, - "step": 1136 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9981258923276954e-05, - "loss": 0.6669, - "step": 1137 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9981136671186738e-05, - "loss": 0.8805, - "step": 1138 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.998101402202981e-05, - "loss": 0.811, - "step": 1139 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980890975811042e-05, - "loss": 0.8309, - "step": 1140 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980767532535336e-05, - "loss": 0.8665, - "step": 1141 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980643692207597e-05, - "loss": 0.9975, - "step": 1142 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980519454832757e-05, - "loss": 0.8044, - "step": 1143 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980394820415753e-05, - "loss": 0.8968, - "step": 1144 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.998026978896155e-05, - "loss": 0.8277, - "step": 1145 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.998014436047511e-05, - "loss": 0.7116, - "step": 1146 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9980018534961436e-05, - "loss": 0.8992, - "step": 1147 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9979892312425527e-05, - "loss": 0.7907, - "step": 1148 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9979765692872404e-05, - "loss": 0.871, - "step": 1149 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9979638676307106e-05, - "loss": 0.8739, - "step": 1150 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9979511262734686e-05, - "loss": 0.7616, - "step": 1151 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997938345216021e-05, - "loss": 0.9901, - "step": 1152 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997925524458877e-05, - "loss": 0.7626, - "step": 1153 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9979126640025455e-05, - "loss": 0.8441, - "step": 1154 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978997638475392e-05, - "loss": 0.7629, - "step": 1155 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978868239943704e-05, - "loss": 0.9652, - "step": 1156 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978738444435544e-05, - "loss": 0.8058, - "step": 1157 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978608251956073e-05, - "loss": 0.7908, - "step": 1158 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978477662510474e-05, - "loss": 0.7789, - "step": 1159 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997834667610394e-05, - "loss": 1.0641, - "step": 1160 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978215292741677e-05, - "loss": 0.8623, - "step": 1161 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9978083512428917e-05, - "loss": 0.8856, - "step": 1162 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977951335170903e-05, - "loss": 0.6593, - "step": 1163 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977818760972895e-05, - "loss": 0.8487, - "step": 1164 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997768578984016e-05, - "loss": 0.7861, - "step": 1165 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997755242177799e-05, - "loss": 0.8677, - "step": 1166 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977418656791697e-05, - "loss": 0.7665, - "step": 1167 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977284494886594e-05, - "loss": 0.8675, - "step": 1168 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977149936068022e-05, - "loss": 0.9427, - "step": 1169 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9977014980341334e-05, - "loss": 0.9231, - "step": 1170 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.99768796277119e-05, - "loss": 0.8815, - "step": 1171 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9976743878185104e-05, - "loss": 0.8793, - "step": 1172 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9976607731766343e-05, - "loss": 0.9092, - "step": 1173 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9976471188461034e-05, - "loss": 0.8234, - "step": 1174 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9976334248274613e-05, - "loss": 0.8399, - "step": 1175 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997619691121252e-05, - "loss": 0.9267, - "step": 1176 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9976059177280233e-05, - "loss": 1.0094, - "step": 1177 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975921046483217e-05, - "loss": 0.9742, - "step": 1178 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975782518826972e-05, - "loss": 0.8716, - "step": 1179 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975643594317008e-05, - "loss": 1.0713, - "step": 1180 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975504272958854e-05, - "loss": 0.7195, - "step": 1181 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997536455475805e-05, - "loss": 0.7582, - "step": 1182 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975224439720157e-05, - "loss": 0.7504, - "step": 1183 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9975083927850747e-05, - "loss": 0.818, - "step": 1184 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9974943019155408e-05, - "loss": 0.7303, - "step": 1185 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9974801713639752e-05, - "loss": 0.9618, - "step": 1186 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.9974660011309392e-05, - "loss": 0.7737, - "step": 1187 - }, - { - "epoch": 0.21, - "grad_norm": 0.0, - "learning_rate": 1.997451791216997e-05, - "loss": 0.8, - "step": 1188 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997437541622714e-05, - "loss": 0.8264, - "step": 1189 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997423252348657e-05, - "loss": 0.7963, - "step": 1190 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9974089233953938e-05, - "loss": 0.7237, - "step": 1191 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997394554763495e-05, - "loss": 0.8616, - "step": 1192 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9973801464535326e-05, - "loss": 0.9371, - "step": 1193 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9973656984660793e-05, - "loss": 0.7771, - "step": 1194 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9973512108017098e-05, - "loss": 0.8082, - "step": 1195 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9973366834610008e-05, - "loss": 0.8651, - "step": 1196 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9973221164445295e-05, - "loss": 0.7875, - "step": 1197 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997307509752876e-05, - "loss": 0.9222, - "step": 1198 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972928633866216e-05, - "loss": 0.7356, - "step": 1199 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972781773463488e-05, - "loss": 0.8621, - "step": 1200 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972634516326415e-05, - "loss": 0.8622, - "step": 1201 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972486862460853e-05, - "loss": 0.7084, - "step": 1202 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972338811872683e-05, - "loss": 0.8648, - "step": 1203 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972190364567793e-05, - "loss": 0.8922, - "step": 1204 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9972041520552087e-05, - "loss": 0.9239, - "step": 1205 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9971892279831482e-05, - "loss": 0.8219, - "step": 1206 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997174264241192e-05, - "loss": 0.9487, - "step": 1207 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9971592608299356e-05, - "loss": 0.7828, - "step": 1208 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9971442177499757e-05, - "loss": 0.6973, - "step": 1209 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.99712913500191e-05, - "loss": 0.9067, - "step": 1210 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9971140125863396e-05, - "loss": 0.9242, - "step": 1211 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9970988505038654e-05, - "loss": 0.8089, - "step": 1212 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997083648755091e-05, - "loss": 0.8027, - "step": 1213 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997068407340621e-05, - "loss": 0.9414, - "step": 1214 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9970531262610618e-05, - "loss": 0.8314, - "step": 1215 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.997037805517021e-05, - "loss": 0.9546, - "step": 1216 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9970224451091082e-05, - "loss": 0.9148, - "step": 1217 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9970070450379343e-05, - "loss": 0.7995, - "step": 1218 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9969916053041126e-05, - "loss": 0.704, - "step": 1219 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996976125908257e-05, - "loss": 0.8701, - "step": 1220 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996960606850983e-05, - "loss": 0.9239, - "step": 1221 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9969450481329077e-05, - "loss": 0.8079, - "step": 1222 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9969294497546512e-05, - "loss": 0.8451, - "step": 1223 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9969138117168332e-05, - "loss": 0.9581, - "step": 1224 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996898134020076e-05, - "loss": 0.8377, - "step": 1225 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996882416665003e-05, - "loss": 0.9527, - "step": 1226 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.99686665965224e-05, - "loss": 0.8748, - "step": 1227 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996850862982413e-05, - "loss": 0.8004, - "step": 1228 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9968350266561516e-05, - "loss": 0.9719, - "step": 1229 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9968191506740847e-05, - "loss": 0.6787, - "step": 1230 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9968032350368447e-05, - "loss": 0.707, - "step": 1231 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9967872797450643e-05, - "loss": 0.7448, - "step": 1232 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9967712847993782e-05, - "loss": 0.8139, - "step": 1233 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996755250200423e-05, - "loss": 0.8812, - "step": 1234 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9967391759488365e-05, - "loss": 0.7707, - "step": 1235 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996723062045258e-05, - "loss": 0.7967, - "step": 1236 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9967069084903282e-05, - "loss": 0.839, - "step": 1237 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9966907152846905e-05, - "loss": 0.9095, - "step": 1238 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9966744824289886e-05, - "loss": 0.8374, - "step": 1239 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9966582099238682e-05, - "loss": 0.7712, - "step": 1240 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996641897769977e-05, - "loss": 0.8375, - "step": 1241 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.996625545967964e-05, - "loss": 0.8494, - "step": 1242 - }, - { - "epoch": 0.22, - "grad_norm": 0.0, - "learning_rate": 1.9966091545184795e-05, - "loss": 0.8361, - "step": 1243 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9965927234221752e-05, - "loss": 0.7107, - "step": 1244 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996576252679705e-05, - "loss": 0.8379, - "step": 1245 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9965597422917248e-05, - "loss": 0.8929, - "step": 1246 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9965431922588905e-05, - "loss": 0.8493, - "step": 1247 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9965266025818608e-05, - "loss": 0.7986, - "step": 1248 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996509973261296e-05, - "loss": 0.7255, - "step": 1249 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996493304297857e-05, - "loss": 0.9056, - "step": 1250 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9964765956922075e-05, - "loss": 0.76, - "step": 1251 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996459847445012e-05, - "loss": 0.8466, - "step": 1252 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996443059556936e-05, - "loss": 0.6876, - "step": 1253 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996426232028649e-05, - "loss": 0.8441, - "step": 1254 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9964093648608193e-05, - "loss": 0.9379, - "step": 1255 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9963924580541182e-05, - "loss": 0.8493, - "step": 1256 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9963755116092178e-05, - "loss": 0.9497, - "step": 1257 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9963585255267926e-05, - "loss": 0.8512, - "step": 1258 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996341499807519e-05, - "loss": 0.8966, - "step": 1259 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9963244344520737e-05, - "loss": 0.8336, - "step": 1260 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996307329461135e-05, - "loss": 0.7012, - "step": 1261 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962901848353842e-05, - "loss": 0.8109, - "step": 1262 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962730005755033e-05, - "loss": 0.7784, - "step": 1263 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962557766821756e-05, - "loss": 0.9072, - "step": 1264 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962385131560865e-05, - "loss": 0.8263, - "step": 1265 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962212099979225e-05, - "loss": 0.7939, - "step": 1266 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9962038672083723e-05, - "loss": 0.7303, - "step": 1267 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9961864847881255e-05, - "loss": 0.8386, - "step": 1268 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996169062737874e-05, - "loss": 0.8192, - "step": 1269 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9961516010583106e-05, - "loss": 1.0556, - "step": 1270 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.99613409975013e-05, - "loss": 0.8248, - "step": 1271 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9961165588140284e-05, - "loss": 0.8038, - "step": 1272 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9960989782507035e-05, - "loss": 0.7862, - "step": 1273 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9960813580608552e-05, - "loss": 0.8922, - "step": 1274 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.996063698245184e-05, - "loss": 0.7555, - "step": 1275 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9960459988043924e-05, - "loss": 0.753, - "step": 1276 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9960282597391843e-05, - "loss": 0.6911, - "step": 1277 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9960104810502662e-05, - "loss": 0.7747, - "step": 1278 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995992662738345e-05, - "loss": 0.7997, - "step": 1279 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9959748048041292e-05, - "loss": 0.9492, - "step": 1280 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9959569072483296e-05, - "loss": 0.7736, - "step": 1281 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9959389700716578e-05, - "loss": 0.8501, - "step": 1282 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995920993274828e-05, - "loss": 0.8581, - "step": 1283 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9959029768585546e-05, - "loss": 0.7876, - "step": 1284 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995884920823555e-05, - "loss": 0.7976, - "step": 1285 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995866825170547e-05, - "loss": 0.7855, - "step": 1286 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995848689900251e-05, - "loss": 0.7209, - "step": 1287 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9958305150133878e-05, - "loss": 0.7672, - "step": 1288 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995812300510681e-05, - "loss": 0.7953, - "step": 1289 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9957940463928548e-05, - "loss": 0.7867, - "step": 1290 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9957757526606353e-05, - "loss": 0.8037, - "step": 1291 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.995757419314751e-05, - "loss": 0.9281, - "step": 1292 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.99573904635593e-05, - "loss": 0.9066, - "step": 1293 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9957206337849048e-05, - "loss": 0.8942, - "step": 1294 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9957021816024067e-05, - "loss": 0.834, - "step": 1295 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.99568368980917e-05, - "loss": 0.8465, - "step": 1296 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9956651584059305e-05, - "loss": 1.0352, - "step": 1297 - }, - { - "epoch": 0.23, - "grad_norm": 0.0, - "learning_rate": 1.9956465873934255e-05, - "loss": 0.991, - "step": 1298 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995627976772393e-05, - "loss": 0.9087, - "step": 1299 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995609326543575e-05, - "loss": 0.8801, - "step": 1300 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9955906367077116e-05, - "loss": 0.8111, - "step": 1301 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9955719072655472e-05, - "loss": 0.8657, - "step": 1302 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995553138217827e-05, - "loss": 0.8363, - "step": 1303 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9955343295652977e-05, - "loss": 0.8641, - "step": 1304 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995515481308707e-05, - "loss": 0.7997, - "step": 1305 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995496593448805e-05, - "loss": 0.9231, - "step": 1306 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995477665986343e-05, - "loss": 0.8235, - "step": 1307 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9954586989220748e-05, - "loss": 0.6905, - "step": 1308 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9954396922567536e-05, - "loss": 0.8035, - "step": 1309 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9954206459911364e-05, - "loss": 0.7698, - "step": 1310 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9954015601259805e-05, - "loss": 0.9817, - "step": 1311 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9953824346620455e-05, - "loss": 0.7698, - "step": 1312 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995363269600092e-05, - "loss": 0.837, - "step": 1313 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9953440649408824e-05, - "loss": 0.8392, - "step": 1314 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995324820685181e-05, - "loss": 0.8605, - "step": 1315 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995305536833753e-05, - "loss": 0.9322, - "step": 1316 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9952862133873655e-05, - "loss": 0.8277, - "step": 1317 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995266850346788e-05, - "loss": 0.7663, - "step": 1318 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9952474477127897e-05, - "loss": 0.7619, - "step": 1319 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9952280054861427e-05, - "loss": 0.8348, - "step": 1320 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9952085236676212e-05, - "loss": 0.727, - "step": 1321 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9951890022579998e-05, - "loss": 0.8149, - "step": 1322 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9951694412580547e-05, - "loss": 0.8964, - "step": 1323 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995149840668565e-05, - "loss": 0.7349, - "step": 1324 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9951302004903093e-05, - "loss": 0.6962, - "step": 1325 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9951105207240695e-05, - "loss": 0.7526, - "step": 1326 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9950908013706285e-05, - "loss": 0.7476, - "step": 1327 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995071042430771e-05, - "loss": 0.7857, - "step": 1328 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9950512439052824e-05, - "loss": 0.8139, - "step": 1329 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.995031405794951e-05, - "loss": 0.7671, - "step": 1330 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9950115281005655e-05, - "loss": 0.8633, - "step": 1331 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994991610822917e-05, - "loss": 0.7905, - "step": 1332 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994971653962798e-05, - "loss": 0.9422, - "step": 1333 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9949516575210015e-05, - "loss": 0.7448, - "step": 1334 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994931621498324e-05, - "loss": 0.8447, - "step": 1335 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994911545895562e-05, - "loss": 1.0357, - "step": 1336 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9948914307135142e-05, - "loss": 0.7847, - "step": 1337 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9948712759529813e-05, - "loss": 0.8666, - "step": 1338 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9948510816147647e-05, - "loss": 0.8123, - "step": 1339 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994830847699668e-05, - "loss": 0.8102, - "step": 1340 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9948105742084952e-05, - "loss": 0.8861, - "step": 1341 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994790261142054e-05, - "loss": 0.8657, - "step": 1342 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994769908501152e-05, - "loss": 0.7715, - "step": 1343 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9947495162865992e-05, - "loss": 0.857, - "step": 1344 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9947290844992063e-05, - "loss": 0.9256, - "step": 1345 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9947086131397862e-05, - "loss": 0.7546, - "step": 1346 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9946881022091536e-05, - "loss": 0.9849, - "step": 1347 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9946675517081242e-05, - "loss": 0.7986, - "step": 1348 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9946469616375157e-05, - "loss": 0.8343, - "step": 1349 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9946263319981473e-05, - "loss": 0.9134, - "step": 1350 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9946056627908392e-05, - "loss": 0.7413, - "step": 1351 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.994584954016414e-05, - "loss": 0.7996, - "step": 1352 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9945642056756956e-05, - "loss": 0.9309, - "step": 1353 - }, - { - "epoch": 0.24, - "grad_norm": 0.0, - "learning_rate": 1.9945434177695095e-05, - "loss": 0.7928, - "step": 1354 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9945225902986824e-05, - "loss": 0.9828, - "step": 1355 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9945017232640428e-05, - "loss": 0.8124, - "step": 1356 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9944808166664213e-05, - "loss": 0.7661, - "step": 1357 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994459870506649e-05, - "loss": 0.9713, - "step": 1358 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9944388847855595e-05, - "loss": 0.796, - "step": 1359 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9944178595039877e-05, - "loss": 0.8893, - "step": 1360 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9943967946627695e-05, - "loss": 0.8381, - "step": 1361 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9943756902627436e-05, - "loss": 0.8142, - "step": 1362 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9943545463047494e-05, - "loss": 0.8833, - "step": 1363 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994333362789628e-05, - "loss": 0.7633, - "step": 1364 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994312139718222e-05, - "loss": 0.8622, - "step": 1365 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9942908770913756e-05, - "loss": 0.8474, - "step": 1366 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9942695749099344e-05, - "loss": 0.8945, - "step": 1367 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994248233174747e-05, - "loss": 0.8459, - "step": 1368 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9942268518866613e-05, - "loss": 0.8135, - "step": 1369 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994205431046528e-05, - "loss": 0.8222, - "step": 1370 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9941839706551997e-05, - "loss": 0.7784, - "step": 1371 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.99416247071353e-05, - "loss": 0.7259, - "step": 1372 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9941409312223737e-05, - "loss": 0.8844, - "step": 1373 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9941193521825885e-05, - "loss": 0.7871, - "step": 1374 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9940977335950324e-05, - "loss": 0.9137, - "step": 1375 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.994076075460566e-05, - "loss": 0.8029, - "step": 1376 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9940543777800493e-05, - "loss": 0.7624, - "step": 1377 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9940326405543472e-05, - "loss": 0.9847, - "step": 1378 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9940108637843237e-05, - "loss": 0.8446, - "step": 1379 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993989047470845e-05, - "loss": 0.8441, - "step": 1380 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9939671916147792e-05, - "loss": 0.8569, - "step": 1381 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993945296216996e-05, - "loss": 0.8656, - "step": 1382 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9939233612783662e-05, - "loss": 0.9497, - "step": 1383 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993901386799762e-05, - "loss": 0.8808, - "step": 1384 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9938793727820585e-05, - "loss": 0.8656, - "step": 1385 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993857319226131e-05, - "loss": 0.9452, - "step": 1386 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993835226132856e-05, - "loss": 0.843, - "step": 1387 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9938130935031136e-05, - "loss": 0.8814, - "step": 1388 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993790921337784e-05, - "loss": 0.8159, - "step": 1389 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993768709637749e-05, - "loss": 0.8694, - "step": 1390 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9937464584038926e-05, - "loss": 0.7978, - "step": 1391 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9937241676370992e-05, - "loss": 0.8869, - "step": 1392 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9937018373382564e-05, - "loss": 0.8071, - "step": 1393 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993679467508252e-05, - "loss": 0.7616, - "step": 1394 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9936570581479763e-05, - "loss": 0.8917, - "step": 1395 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9936346092583205e-05, - "loss": 0.8671, - "step": 1396 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993612120840178e-05, - "loss": 0.7315, - "step": 1397 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9935895928944428e-05, - "loss": 0.9237, - "step": 1398 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9935670254220115e-05, - "loss": 0.8986, - "step": 1399 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9935444184237817e-05, - "loss": 0.8871, - "step": 1400 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993521771900653e-05, - "loss": 0.8298, - "step": 1401 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9934990858535266e-05, - "loss": 0.9181, - "step": 1402 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9934763602833043e-05, - "loss": 0.7675, - "step": 1403 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9934535951908907e-05, - "loss": 0.8342, - "step": 1404 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.993430790577191e-05, - "loss": 0.8009, - "step": 1405 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9934079464431128e-05, - "loss": 0.8019, - "step": 1406 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9933850627895643e-05, - "loss": 0.7499, - "step": 1407 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9933621396174568e-05, - "loss": 0.976, - "step": 1408 - }, - { - "epoch": 0.25, - "grad_norm": 0.0, - "learning_rate": 1.9933391769277013e-05, - "loss": 0.9115, - "step": 1409 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9933161747212114e-05, - "loss": 0.781, - "step": 1410 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.993293132998903e-05, - "loss": 0.8573, - "step": 1411 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9932700517616918e-05, - "loss": 0.8434, - "step": 1412 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9932469310104963e-05, - "loss": 0.9045, - "step": 1413 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9932237707462368e-05, - "loss": 0.9033, - "step": 1414 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9932005709698337e-05, - "loss": 0.799, - "step": 1415 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.993177331682211e-05, - "loss": 0.808, - "step": 1416 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9931540528842923e-05, - "loss": 0.8017, - "step": 1417 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.993130734577004e-05, - "loss": 0.7661, - "step": 1418 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9931073767612735e-05, - "loss": 0.9441, - "step": 1419 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9930839794380308e-05, - "loss": 0.8576, - "step": 1420 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.993060542608206e-05, - "loss": 0.9336, - "step": 1421 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9930370662727315e-05, - "loss": 0.8251, - "step": 1422 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9930135504325412e-05, - "loss": 0.8771, - "step": 1423 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992989995088571e-05, - "loss": 0.7416, - "step": 1424 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9929664002417576e-05, - "loss": 0.7379, - "step": 1425 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9929427658930396e-05, - "loss": 0.8175, - "step": 1426 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9929190920433578e-05, - "loss": 0.9571, - "step": 1427 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992895378693653e-05, - "loss": 0.8127, - "step": 1428 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9928716258448693e-05, - "loss": 0.8833, - "step": 1429 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9928478334979515e-05, - "loss": 0.7196, - "step": 1430 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9928240016538463e-05, - "loss": 0.9046, - "step": 1431 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9928001303135012e-05, - "loss": 0.8563, - "step": 1432 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992776219477866e-05, - "loss": 0.8979, - "step": 1433 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9927522691478923e-05, - "loss": 0.7847, - "step": 1434 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9927282793245327e-05, - "loss": 0.8835, - "step": 1435 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9927042500087418e-05, - "loss": 0.8489, - "step": 1436 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9926801812014748e-05, - "loss": 0.7356, - "step": 1437 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9926560729036898e-05, - "loss": 0.9042, - "step": 1438 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9926319251163454e-05, - "loss": 0.856, - "step": 1439 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9926077378404027e-05, - "loss": 0.882, - "step": 1440 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992583511076824e-05, - "loss": 0.9674, - "step": 1441 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992559244826573e-05, - "loss": 0.8396, - "step": 1442 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9925349390906143e-05, - "loss": 0.8818, - "step": 1443 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9925105938699155e-05, - "loss": 0.8519, - "step": 1444 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9924862091654453e-05, - "loss": 0.8482, - "step": 1445 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9924617849781734e-05, - "loss": 0.7658, - "step": 1446 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9924373213090716e-05, - "loss": 0.7599, - "step": 1447 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992412818159113e-05, - "loss": 0.8009, - "step": 1448 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992388275529272e-05, - "loss": 0.709, - "step": 1449 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9923636934205257e-05, - "loss": 0.8811, - "step": 1450 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9923390718338518e-05, - "loss": 0.8586, - "step": 1451 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9923144107702295e-05, - "loss": 0.9261, - "step": 1452 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9922897102306398e-05, - "loss": 0.8893, - "step": 1453 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992264970216066e-05, - "loss": 0.9006, - "step": 1454 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9922401907274913e-05, - "loss": 0.7246, - "step": 1455 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9922153717659024e-05, - "loss": 0.9085, - "step": 1456 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992190513332286e-05, - "loss": 0.7628, - "step": 1457 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9921656154276313e-05, - "loss": 0.7527, - "step": 1458 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992140678052929e-05, - "loss": 0.737, - "step": 1459 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9921157012091704e-05, - "loss": 0.8374, - "step": 1460 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9920906848973504e-05, - "loss": 0.7521, - "step": 1461 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9920656291184625e-05, - "loss": 0.8292, - "step": 1462 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.992040533873505e-05, - "loss": 0.7348, - "step": 1463 - }, - { - "epoch": 0.26, - "grad_norm": 0.0, - "learning_rate": 1.9920153991634757e-05, - "loss": 0.8025, - "step": 1464 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9919902249893738e-05, - "loss": 0.7579, - "step": 1465 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9919650113522017e-05, - "loss": 0.8099, - "step": 1466 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9919397582529623e-05, - "loss": 0.8236, - "step": 1467 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9919144656926598e-05, - "loss": 0.7808, - "step": 1468 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9918891336723005e-05, - "loss": 0.7394, - "step": 1469 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9918637621928924e-05, - "loss": 0.6834, - "step": 1470 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991838351255445e-05, - "loss": 0.8807, - "step": 1471 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9918129008609684e-05, - "loss": 0.8914, - "step": 1472 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991787411010476e-05, - "loss": 0.885, - "step": 1473 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991761881704981e-05, - "loss": 0.8751, - "step": 1474 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9917363129454995e-05, - "loss": 0.8198, - "step": 1475 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9917107047330485e-05, - "loss": 0.6958, - "step": 1476 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9916850570686465e-05, - "loss": 0.6718, - "step": 1477 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9916593699533143e-05, - "loss": 0.8016, - "step": 1478 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9916336433880738e-05, - "loss": 0.8946, - "step": 1479 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9916078773739478e-05, - "loss": 0.9082, - "step": 1480 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991582071911962e-05, - "loss": 0.7826, - "step": 1481 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9915562270031426e-05, - "loss": 0.7868, - "step": 1482 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991530342648518e-05, - "loss": 0.8589, - "step": 1483 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9915044188491175e-05, - "loss": 0.9583, - "step": 1484 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991478455605973e-05, - "loss": 1.076, - "step": 1485 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9914524529201167e-05, - "loss": 1.056, - "step": 1486 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9914264107925833e-05, - "loss": 0.8015, - "step": 1487 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9914003292244094e-05, - "loss": 0.8226, - "step": 1488 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9913742082166316e-05, - "loss": 0.6809, - "step": 1489 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9913480477702896e-05, - "loss": 0.8996, - "step": 1490 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991321847886424e-05, - "loss": 0.9764, - "step": 1491 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9912956085660774e-05, - "loss": 0.8661, - "step": 1492 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9912693298102926e-05, - "loss": 0.755, - "step": 1493 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9912430116201165e-05, - "loss": 0.8833, - "step": 1494 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9912166539965946e-05, - "loss": 0.917, - "step": 1495 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9911902569407765e-05, - "loss": 0.7176, - "step": 1496 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991163820453712e-05, - "loss": 0.9629, - "step": 1497 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9911373445364526e-05, - "loss": 0.7848, - "step": 1498 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991110829190052e-05, - "loss": 0.8939, - "step": 1499 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9910842744155647e-05, - "loss": 0.7156, - "step": 1500 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991057680214047e-05, - "loss": 0.8195, - "step": 1501 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991031046586557e-05, - "loss": 0.7022, - "step": 1502 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.991004373534154e-05, - "loss": 0.8488, - "step": 1503 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9909776610578995e-05, - "loss": 0.7894, - "step": 1504 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9909509091588562e-05, - "loss": 0.8316, - "step": 1505 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990924117838088e-05, - "loss": 0.8731, - "step": 1506 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990897287096661e-05, - "loss": 0.7871, - "step": 1507 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9908704169356424e-05, - "loss": 0.7111, - "step": 1508 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9908435073561013e-05, - "loss": 0.9042, - "step": 1509 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990816558359108e-05, - "loss": 0.7057, - "step": 1510 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9907895699457344e-05, - "loss": 0.9059, - "step": 1511 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9907625421170542e-05, - "loss": 0.752, - "step": 1512 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990735474874143e-05, - "loss": 0.8787, - "step": 1513 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9907083682180777e-05, - "loss": 0.8948, - "step": 1514 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990681222149936e-05, - "loss": 0.9631, - "step": 1515 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9906540366707982e-05, - "loss": 0.7973, - "step": 1516 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.9906268117817457e-05, - "loss": 0.9129, - "step": 1517 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990599547483862e-05, - "loss": 0.7535, - "step": 1518 - }, - { - "epoch": 0.27, - "grad_norm": 0.0, - "learning_rate": 1.990572243778231e-05, - "loss": 0.8544, - "step": 1519 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9905449006659388e-05, - "loss": 0.9656, - "step": 1520 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990517518148074e-05, - "loss": 0.8308, - "step": 1521 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9904900962257255e-05, - "loss": 0.8725, - "step": 1522 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990462634899984e-05, - "loss": 0.7592, - "step": 1523 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9904351341719425e-05, - "loss": 0.7024, - "step": 1524 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9904075940426942e-05, - "loss": 0.8573, - "step": 1525 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9903800145133353e-05, - "loss": 0.7499, - "step": 1526 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990352395584963e-05, - "loss": 0.7245, - "step": 1527 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9903247372586752e-05, - "loss": 0.6864, - "step": 1528 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9902970395355735e-05, - "loss": 0.8253, - "step": 1529 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990269302416759e-05, - "loss": 0.878, - "step": 1530 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990241525903335e-05, - "loss": 0.8728, - "step": 1531 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990213709996407e-05, - "loss": 0.7183, - "step": 1532 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990185854697081e-05, - "loss": 0.7644, - "step": 1533 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9901579600064654e-05, - "loss": 0.8992, - "step": 1534 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.99013002592567e-05, - "loss": 0.8924, - "step": 1535 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.990102052455806e-05, - "loss": 0.8555, - "step": 1536 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9900740395979863e-05, - "loss": 0.7192, - "step": 1537 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9900459873533252e-05, - "loss": 0.736, - "step": 1538 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9900178957229385e-05, - "loss": 0.8525, - "step": 1539 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9899897647079444e-05, - "loss": 0.8905, - "step": 1540 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989961594309461e-05, - "loss": 0.9012, - "step": 1541 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.98993338452861e-05, - "loss": 0.8589, - "step": 1542 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9899051353665126e-05, - "loss": 0.756, - "step": 1543 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989876846824293e-05, - "loss": 0.9557, - "step": 1544 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989848518903077e-05, - "loss": 0.7402, - "step": 1545 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9898201516039913e-05, - "loss": 0.8877, - "step": 1546 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989791744928164e-05, - "loss": 0.9002, - "step": 1547 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9897632988767257e-05, - "loss": 0.7359, - "step": 1548 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9897348134508078e-05, - "loss": 0.7785, - "step": 1549 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9897062886515436e-05, - "loss": 0.7963, - "step": 1550 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9896777244800676e-05, - "loss": 0.8738, - "step": 1551 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989649120937516e-05, - "loss": 0.8571, - "step": 1552 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9896204780250276e-05, - "loss": 0.8072, - "step": 1553 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989591795743741e-05, - "loss": 0.8235, - "step": 1554 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9895630740947975e-05, - "loss": 0.7535, - "step": 1555 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.98953431307934e-05, - "loss": 0.8612, - "step": 1556 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9895055126985122e-05, - "loss": 0.8212, - "step": 1557 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9894766729534595e-05, - "loss": 0.8414, - "step": 1558 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9894477938453302e-05, - "loss": 0.8741, - "step": 1559 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9894188753752726e-05, - "loss": 0.7514, - "step": 1560 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9893899175444372e-05, - "loss": 0.8018, - "step": 1561 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9893609203539757e-05, - "loss": 0.8045, - "step": 1562 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989331883805042e-05, - "loss": 0.8746, - "step": 1563 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9893028078987914e-05, - "loss": 0.8458, - "step": 1564 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.98927369263638e-05, - "loss": 0.895, - "step": 1565 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9892445380189664e-05, - "loss": 0.7544, - "step": 1566 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9892153440477106e-05, - "loss": 0.8017, - "step": 1567 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9891861107237737e-05, - "loss": 0.7556, - "step": 1568 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.989156838048319e-05, - "loss": 0.8748, - "step": 1569 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9891275260225105e-05, - "loss": 0.9546, - "step": 1570 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9890981746475145e-05, - "loss": 0.7106, - "step": 1571 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9890687839244988e-05, - "loss": 0.9252, - "step": 1572 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9890393538546325e-05, - "loss": 0.9933, - "step": 1573 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9890098844390864e-05, - "loss": 0.9575, - "step": 1574 - }, - { - "epoch": 0.28, - "grad_norm": 0.0, - "learning_rate": 1.9889803756790327e-05, - "loss": 0.6209, - "step": 1575 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9889508275756457e-05, - "loss": 0.7607, - "step": 1576 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9889212401301004e-05, - "loss": 0.7092, - "step": 1577 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9888916133435742e-05, - "loss": 0.8992, - "step": 1578 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.988861947217245e-05, - "loss": 0.901, - "step": 1579 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9888322417522944e-05, - "loss": 0.8929, - "step": 1580 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9888024969499023e-05, - "loss": 0.7412, - "step": 1581 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9887727128112537e-05, - "loss": 0.8417, - "step": 1582 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9887428893375325e-05, - "loss": 0.8853, - "step": 1583 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9887130265299254e-05, - "loss": 0.8979, - "step": 1584 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9886831243896204e-05, - "loss": 0.899, - "step": 1585 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9886531829178067e-05, - "loss": 0.8417, - "step": 1586 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9886232021156762e-05, - "loss": 0.7752, - "step": 1587 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.988593181984421e-05, - "loss": 0.8352, - "step": 1588 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9885631225252352e-05, - "loss": 0.7745, - "step": 1589 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.988533023739315e-05, - "loss": 0.8884, - "step": 1590 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9885028856278573e-05, - "loss": 0.8559, - "step": 1591 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9884727081920622e-05, - "loss": 1.0028, - "step": 1592 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9884424914331288e-05, - "loss": 0.8886, - "step": 1593 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.98841223535226e-05, - "loss": 0.7468, - "step": 1594 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9883819399506592e-05, - "loss": 0.7901, - "step": 1595 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9883516052295317e-05, - "loss": 0.9135, - "step": 1596 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.988321231190084e-05, - "loss": 0.8754, - "step": 1597 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9882908178335253e-05, - "loss": 0.8901, - "step": 1598 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9882603651610642e-05, - "loss": 0.7703, - "step": 1599 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9882298731739128e-05, - "loss": 0.7685, - "step": 1600 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9881993418732843e-05, - "loss": 0.7836, - "step": 1601 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.988168771260393e-05, - "loss": 0.98, - "step": 1602 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9881381613364554e-05, - "loss": 0.8309, - "step": 1603 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9881075121026886e-05, - "loss": 0.9761, - "step": 1604 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9880768235603126e-05, - "loss": 0.7733, - "step": 1605 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9880460957105473e-05, - "loss": 0.8753, - "step": 1606 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9880153285546165e-05, - "loss": 0.7686, - "step": 1607 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9879845220937427e-05, - "loss": 0.8104, - "step": 1608 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9879536763291526e-05, - "loss": 0.8456, - "step": 1609 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9879227912620722e-05, - "loss": 0.7786, - "step": 1610 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9878918668937314e-05, - "loss": 0.7754, - "step": 1611 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9878609032253596e-05, - "loss": 0.8154, - "step": 1612 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9878299002581885e-05, - "loss": 0.8579, - "step": 1613 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9877988579934516e-05, - "loss": 0.9338, - "step": 1614 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9877677764323847e-05, - "loss": 0.8696, - "step": 1615 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9877366555762228e-05, - "loss": 0.8679, - "step": 1616 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.987705495426205e-05, - "loss": 0.7406, - "step": 1617 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9876742959835703e-05, - "loss": 0.7837, - "step": 1618 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9876430572495604e-05, - "loss": 0.8128, - "step": 1619 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9876117792254175e-05, - "loss": 0.8426, - "step": 1620 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9875804619123864e-05, - "loss": 0.9521, - "step": 1621 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9875491053117123e-05, - "loss": 0.7284, - "step": 1622 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9875177094246435e-05, - "loss": 0.8163, - "step": 1623 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9874862742524282e-05, - "loss": 0.7661, - "step": 1624 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9874547997963176e-05, - "loss": 0.8237, - "step": 1625 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9874232860575633e-05, - "loss": 0.7686, - "step": 1626 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.987391733037419e-05, - "loss": 0.7417, - "step": 1627 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.98736014073714e-05, - "loss": 0.7982, - "step": 1628 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9873285091579834e-05, - "loss": 0.7278, - "step": 1629 - }, - { - "epoch": 0.29, - "grad_norm": 0.0, - "learning_rate": 1.9872968383012073e-05, - "loss": 0.8316, - "step": 1630 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.987265128168071e-05, - "loss": 0.8161, - "step": 1631 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9872333787598376e-05, - "loss": 0.8233, - "step": 1632 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.987201590077769e-05, - "loss": 0.8653, - "step": 1633 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9871697621231294e-05, - "loss": 0.9123, - "step": 1634 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.987137894897186e-05, - "loss": 0.8633, - "step": 1635 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.987105988401206e-05, - "loss": 0.809, - "step": 1636 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9870740426364587e-05, - "loss": 0.7964, - "step": 1637 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.987042057604215e-05, - "loss": 0.7882, - "step": 1638 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9870100333057473e-05, - "loss": 0.74, - "step": 1639 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9869779697423297e-05, - "loss": 0.8264, - "step": 1640 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9869458669152378e-05, - "loss": 1.0154, - "step": 1641 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9869137248257483e-05, - "loss": 0.8391, - "step": 1642 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9868815434751406e-05, - "loss": 0.8167, - "step": 1643 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986849322864694e-05, - "loss": 0.7562, - "step": 1644 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986817062995691e-05, - "loss": 0.7469, - "step": 1645 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986784763869415e-05, - "loss": 0.8518, - "step": 1646 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.98675242548715e-05, - "loss": 0.7667, - "step": 1647 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9867200478501836e-05, - "loss": 0.6352, - "step": 1648 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9866876309598032e-05, - "loss": 0.8483, - "step": 1649 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9866551748172987e-05, - "loss": 0.927, - "step": 1650 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986622679423961e-05, - "loss": 0.6783, - "step": 1651 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986590144781083e-05, - "loss": 0.7888, - "step": 1652 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9865575708899587e-05, - "loss": 0.7246, - "step": 1653 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9865249577518842e-05, - "loss": 0.8947, - "step": 1654 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986492305368157e-05, - "loss": 0.8373, - "step": 1655 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9864596137400757e-05, - "loss": 0.8122, - "step": 1656 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9864268828689413e-05, - "loss": 0.9212, - "step": 1657 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9863941127560557e-05, - "loss": 0.7686, - "step": 1658 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9863613034027224e-05, - "loss": 0.877, - "step": 1659 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9863284548102467e-05, - "loss": 0.7452, - "step": 1660 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9862955669799356e-05, - "loss": 0.8449, - "step": 1661 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986262639913097e-05, - "loss": 0.8607, - "step": 1662 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9862296736110412e-05, - "loss": 0.7391, - "step": 1663 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9861966680750796e-05, - "loss": 0.9042, - "step": 1664 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9861636233065247e-05, - "loss": 0.9898, - "step": 1665 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9861305393066916e-05, - "loss": 0.8157, - "step": 1666 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986097416076896e-05, - "loss": 0.7996, - "step": 1667 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9860642536184564e-05, - "loss": 0.8912, - "step": 1668 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.986031051932692e-05, - "loss": 0.8146, - "step": 1669 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9859978110209225e-05, - "loss": 0.9093, - "step": 1670 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9859645308844708e-05, - "loss": 0.8495, - "step": 1671 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9859312115246613e-05, - "loss": 0.7811, - "step": 1672 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9858978529428193e-05, - "loss": 0.9314, - "step": 1673 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9858644551402718e-05, - "loss": 0.8114, - "step": 1674 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.985831018118347e-05, - "loss": 0.7734, - "step": 1675 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.985797541878376e-05, - "loss": 0.9756, - "step": 1676 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9857640264216895e-05, - "loss": 0.7901, - "step": 1677 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9857304717496217e-05, - "loss": 0.8907, - "step": 1678 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.985696877863507e-05, - "loss": 0.8776, - "step": 1679 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.985663244764682e-05, - "loss": 0.8772, - "step": 1680 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9856295724544842e-05, - "loss": 0.8717, - "step": 1681 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9855958609342542e-05, - "loss": 0.8447, - "step": 1682 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.985562110205332e-05, - "loss": 0.8607, - "step": 1683 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9855283202690606e-05, - "loss": 0.9534, - "step": 1684 - }, - { - "epoch": 0.3, - "grad_norm": 0.0, - "learning_rate": 1.9854944911267842e-05, - "loss": 0.753, - "step": 1685 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.985460622779849e-05, - "loss": 0.9475, - "step": 1686 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9854267152296023e-05, - "loss": 0.8112, - "step": 1687 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9853927684773924e-05, - "loss": 0.8141, - "step": 1688 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9853587825245698e-05, - "loss": 0.7943, - "step": 1689 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9853247573724875e-05, - "loss": 0.8635, - "step": 1690 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9852906930224977e-05, - "loss": 0.7782, - "step": 1691 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.985256589475957e-05, - "loss": 0.8585, - "step": 1692 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9852224467342207e-05, - "loss": 0.9337, - "step": 1693 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.985188264798648e-05, - "loss": 0.8792, - "step": 1694 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.985154043670598e-05, - "loss": 0.7682, - "step": 1695 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9851197833514328e-05, - "loss": 0.7974, - "step": 1696 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9850854838425152e-05, - "loss": 0.8225, - "step": 1697 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9850511451452094e-05, - "loss": 0.8579, - "step": 1698 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9850167672608814e-05, - "loss": 0.7116, - "step": 1699 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984982350190899e-05, - "loss": 0.7403, - "step": 1700 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9849478939366315e-05, - "loss": 0.8577, - "step": 1701 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9849133984994492e-05, - "loss": 0.8398, - "step": 1702 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9848788638807247e-05, - "loss": 0.7735, - "step": 1703 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984844290081832e-05, - "loss": 0.7739, - "step": 1704 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9848096771041464e-05, - "loss": 0.7006, - "step": 1705 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9847750249490444e-05, - "loss": 0.7273, - "step": 1706 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984740333617905e-05, - "loss": 0.9213, - "step": 1707 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9847056031121083e-05, - "loss": 0.8306, - "step": 1708 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9846708334330358e-05, - "loss": 0.9112, - "step": 1709 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984636024582071e-05, - "loss": 0.7422, - "step": 1710 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984601176560598e-05, - "loss": 0.7365, - "step": 1711 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9845662893700032e-05, - "loss": 0.8493, - "step": 1712 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984531363011675e-05, - "loss": 0.9896, - "step": 1713 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984496397487003e-05, - "loss": 0.8388, - "step": 1714 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984461392797377e-05, - "loss": 0.8446, - "step": 1715 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984426348944191e-05, - "loss": 0.6996, - "step": 1716 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9843912659288384e-05, - "loss": 0.8432, - "step": 1717 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9843561437527145e-05, - "loss": 0.8887, - "step": 1718 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984320982417217e-05, - "loss": 0.968, - "step": 1719 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9842857819237448e-05, - "loss": 0.7691, - "step": 1720 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.984250542273698e-05, - "loss": 0.8703, - "step": 1721 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9842152634684782e-05, - "loss": 0.8816, - "step": 1722 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9841799455094893e-05, - "loss": 0.7893, - "step": 1723 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9841445883981364e-05, - "loss": 0.8142, - "step": 1724 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9841091921358256e-05, - "loss": 0.7573, - "step": 1725 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9840737567239654e-05, - "loss": 0.8326, - "step": 1726 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9840382821639653e-05, - "loss": 0.7292, - "step": 1727 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9840027684572364e-05, - "loss": 0.9953, - "step": 1728 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9839672156051918e-05, - "loss": 0.7585, - "step": 1729 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.983931623609246e-05, - "loss": 0.7303, - "step": 1730 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9838959924708144e-05, - "loss": 0.9722, - "step": 1731 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9838603221913147e-05, - "loss": 0.942, - "step": 1732 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9838246127721657e-05, - "loss": 1.0489, - "step": 1733 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9837888642147887e-05, - "loss": 0.8707, - "step": 1734 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.983753076520605e-05, - "loss": 0.8808, - "step": 1735 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.983717249691039e-05, - "loss": 0.9494, - "step": 1736 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9836813837275157e-05, - "loss": 0.842, - "step": 1737 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9836454786314613e-05, - "loss": 0.7295, - "step": 1738 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.983609534404305e-05, - "loss": 0.8193, - "step": 1739 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.983573551047476e-05, - "loss": 0.8281, - "step": 1740 - }, - { - "epoch": 0.31, - "grad_norm": 0.0, - "learning_rate": 1.9835375285624065e-05, - "loss": 0.8956, - "step": 1741 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9835014669505295e-05, - "loss": 0.773, - "step": 1742 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9834653662132787e-05, - "loss": 0.9194, - "step": 1743 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9834292263520914e-05, - "loss": 0.7886, - "step": 1744 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9833930473684045e-05, - "loss": 0.8631, - "step": 1745 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9833568292636576e-05, - "loss": 0.903, - "step": 1746 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9833205720392915e-05, - "loss": 0.7636, - "step": 1747 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9832842756967486e-05, - "loss": 0.7265, - "step": 1748 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9832479402374724e-05, - "loss": 0.8286, - "step": 1749 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.983211565662909e-05, - "loss": 0.9549, - "step": 1750 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.983175151974505e-05, - "loss": 0.904, - "step": 1751 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9831386991737094e-05, - "loss": 0.8053, - "step": 1752 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.983102207261972e-05, - "loss": 0.8911, - "step": 1753 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9830656762407447e-05, - "loss": 0.7411, - "step": 1754 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9830291061114807e-05, - "loss": 0.7549, - "step": 1755 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982992496875635e-05, - "loss": 0.9034, - "step": 1756 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9829558485346635e-05, - "loss": 0.7117, - "step": 1757 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9829191610900248e-05, - "loss": 0.8679, - "step": 1758 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9828824345431777e-05, - "loss": 0.8999, - "step": 1759 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9828456688955838e-05, - "loss": 1.0611, - "step": 1760 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9828088641487055e-05, - "loss": 0.8265, - "step": 1761 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9827720203040067e-05, - "loss": 0.738, - "step": 1762 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9827351373629535e-05, - "loss": 0.8289, - "step": 1763 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982698215327013e-05, - "loss": 0.9112, - "step": 1764 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9826612541976542e-05, - "loss": 0.9136, - "step": 1765 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982624253976347e-05, - "loss": 0.7798, - "step": 1766 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982587214664564e-05, - "loss": 0.8315, - "step": 1767 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982550136263778e-05, - "loss": 0.951, - "step": 1768 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9825130187754648e-05, - "loss": 0.7277, - "step": 1769 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9824758622011003e-05, - "loss": 0.8434, - "step": 1770 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982438666542163e-05, - "loss": 0.8879, - "step": 1771 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9824014318001326e-05, - "loss": 0.8345, - "step": 1772 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.98236415797649e-05, - "loss": 0.9711, - "step": 1773 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9823268450727186e-05, - "loss": 0.7433, - "step": 1774 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9822894930903022e-05, - "loss": 0.8949, - "step": 1775 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9822521020307272e-05, - "loss": 0.8683, - "step": 1776 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982214671895481e-05, - "loss": 0.7853, - "step": 1777 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9821772026860523e-05, - "loss": 0.8722, - "step": 1778 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982139694403932e-05, - "loss": 0.7161, - "step": 1779 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.982102147050612e-05, - "loss": 0.774, - "step": 1780 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9820645606275864e-05, - "loss": 0.8408, - "step": 1781 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9820269351363503e-05, - "loss": 0.8627, - "step": 1782 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9819892705784004e-05, - "loss": 0.8566, - "step": 1783 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9819515669552348e-05, - "loss": 0.773, - "step": 1784 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9819138242683537e-05, - "loss": 0.8854, - "step": 1785 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9818760425192587e-05, - "loss": 0.8545, - "step": 1786 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9818382217094528e-05, - "loss": 0.7813, - "step": 1787 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9818003618404403e-05, - "loss": 0.7379, - "step": 1788 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9817624629137273e-05, - "loss": 0.9322, - "step": 1789 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9817245249308217e-05, - "loss": 0.797, - "step": 1790 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.981686547893233e-05, - "loss": 0.8439, - "step": 1791 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.981648531802472e-05, - "loss": 0.815, - "step": 1792 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.98161047666005e-05, - "loss": 0.8232, - "step": 1793 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9815723824674822e-05, - "loss": 0.8478, - "step": 1794 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.981534249226283e-05, - "loss": 0.8255, - "step": 1795 - }, - { - "epoch": 0.32, - "grad_norm": 0.0, - "learning_rate": 1.9814960769379704e-05, - "loss": 0.868, - "step": 1796 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9814578656040622e-05, - "loss": 0.688, - "step": 1797 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.981419615226079e-05, - "loss": 0.991, - "step": 1798 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.981381325805542e-05, - "loss": 1.0093, - "step": 1799 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9813429973439748e-05, - "loss": 0.7778, - "step": 1800 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9813046298429017e-05, - "loss": 0.8911, - "step": 1801 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9812662233038497e-05, - "loss": 0.7833, - "step": 1802 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9812277777283464e-05, - "loss": 0.7315, - "step": 1803 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.981189293117921e-05, - "loss": 0.908, - "step": 1804 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9811507694741046e-05, - "loss": 0.8074, - "step": 1805 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.98111220679843e-05, - "loss": 0.7217, - "step": 1806 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9810736050924307e-05, - "loss": 0.9436, - "step": 1807 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.981034964357643e-05, - "loss": 0.8778, - "step": 1808 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9809962845956037e-05, - "loss": 0.9468, - "step": 1809 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980957565807852e-05, - "loss": 0.8479, - "step": 1810 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9809188079959274e-05, - "loss": 0.9256, - "step": 1811 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9808800111613724e-05, - "loss": 0.9429, - "step": 1812 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9808411753057303e-05, - "loss": 0.8907, - "step": 1813 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9808023004305455e-05, - "loss": 0.7731, - "step": 1814 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9807633865373654e-05, - "loss": 0.8815, - "step": 1815 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980724433627738e-05, - "loss": 0.7819, - "step": 1816 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980685441703212e-05, - "loss": 0.9386, - "step": 1817 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9806464107653394e-05, - "loss": 0.8008, - "step": 1818 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9806073408156723e-05, - "loss": 0.7385, - "step": 1819 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9805682318557656e-05, - "loss": 0.6436, - "step": 1820 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980529083887175e-05, - "loss": 0.759, - "step": 1821 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980489896911457e-05, - "loss": 0.7836, - "step": 1822 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9804506709301717e-05, - "loss": 0.8485, - "step": 1823 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9804114059448795e-05, - "loss": 0.7498, - "step": 1824 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9803721019571413e-05, - "loss": 0.9443, - "step": 1825 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9803327589685216e-05, - "loss": 0.7553, - "step": 1826 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9802933769805857e-05, - "loss": 0.809, - "step": 1827 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9802539559948998e-05, - "loss": 0.7614, - "step": 1828 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980214496013032e-05, - "loss": 0.7536, - "step": 1829 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9801749970365526e-05, - "loss": 0.8666, - "step": 1830 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9801354590670325e-05, - "loss": 0.7966, - "step": 1831 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980095882106045e-05, - "loss": 0.8773, - "step": 1832 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9800562661551642e-05, - "loss": 0.7938, - "step": 1833 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.980016611215966e-05, - "loss": 0.9003, - "step": 1834 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9799769172900283e-05, - "loss": 0.7104, - "step": 1835 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9799371843789302e-05, - "loss": 0.8436, - "step": 1836 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.979897412484252e-05, - "loss": 0.925, - "step": 1837 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9798576016075762e-05, - "loss": 0.8011, - "step": 1838 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9798177517504866e-05, - "loss": 0.8066, - "step": 1839 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.979777862914568e-05, - "loss": 0.8428, - "step": 1840 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9797379351014076e-05, - "loss": 1.0253, - "step": 1841 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9796979683125936e-05, - "loss": 0.8566, - "step": 1842 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9796579625497164e-05, - "loss": 0.8446, - "step": 1843 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.979617917814367e-05, - "loss": 0.8315, - "step": 1844 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9795778341081385e-05, - "loss": 0.8581, - "step": 1845 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.979537711432626e-05, - "loss": 0.8863, - "step": 1846 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9794975497894255e-05, - "loss": 0.7646, - "step": 1847 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.979457349180134e-05, - "loss": 0.877, - "step": 1848 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9794171096063516e-05, - "loss": 0.8574, - "step": 1849 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9793768310696783e-05, - "loss": 0.7401, - "step": 1850 - }, - { - "epoch": 0.33, - "grad_norm": 0.0, - "learning_rate": 1.9793365135717172e-05, - "loss": 0.8394, - "step": 1851 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9792961571140718e-05, - "loss": 0.9151, - "step": 1852 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9792557616983478e-05, - "loss": 0.8765, - "step": 1853 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.979215327326152e-05, - "loss": 0.8124, - "step": 1854 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.979174853999093e-05, - "loss": 0.8807, - "step": 1855 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.979134341718781e-05, - "loss": 0.7482, - "step": 1856 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.979093790486827e-05, - "loss": 0.8051, - "step": 1857 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9790532003048452e-05, - "loss": 0.7944, - "step": 1858 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.97901257117445e-05, - "loss": 0.8268, - "step": 1859 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9789719030972573e-05, - "loss": 0.7552, - "step": 1860 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9789311960748852e-05, - "loss": 0.9766, - "step": 1861 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9788904501089534e-05, - "loss": 0.8417, - "step": 1862 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9788496652010822e-05, - "loss": 0.827, - "step": 1863 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9788088413528944e-05, - "loss": 0.928, - "step": 1864 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9787679785660145e-05, - "loss": 0.7932, - "step": 1865 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9787270768420674e-05, - "loss": 0.8199, - "step": 1866 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9786861361826805e-05, - "loss": 0.8719, - "step": 1867 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9786451565894828e-05, - "loss": 0.8183, - "step": 1868 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978604138064104e-05, - "loss": 0.6385, - "step": 1869 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9785630806081765e-05, - "loss": 0.7868, - "step": 1870 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978521984223333e-05, - "loss": 1.0968, - "step": 1871 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9784808489112087e-05, - "loss": 0.8228, - "step": 1872 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.97843967467344e-05, - "loss": 0.8322, - "step": 1873 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978398461511665e-05, - "loss": 0.8155, - "step": 1874 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978357209427523e-05, - "loss": 0.8321, - "step": 1875 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978315918422655e-05, - "loss": 0.8322, - "step": 1876 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978274588498704e-05, - "loss": 0.8489, - "step": 1877 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978233219657314e-05, - "loss": 0.8621, - "step": 1878 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9781918119001306e-05, - "loss": 0.8698, - "step": 1879 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9781503652288013e-05, - "loss": 0.8194, - "step": 1880 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.978108879644975e-05, - "loss": 0.7946, - "step": 1881 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9780673551503014e-05, - "loss": 0.8091, - "step": 1882 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9780257917464333e-05, - "loss": 0.7578, - "step": 1883 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977984189435024e-05, - "loss": 0.9149, - "step": 1884 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9779425482177277e-05, - "loss": 0.7765, - "step": 1885 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9779008680962018e-05, - "loss": 0.7428, - "step": 1886 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9778591490721036e-05, - "loss": 0.9341, - "step": 1887 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977817391147094e-05, - "loss": 0.7911, - "step": 1888 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977775594322833e-05, - "loss": 0.9017, - "step": 1889 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977733758600984e-05, - "loss": 0.8573, - "step": 1890 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9776918839832113e-05, - "loss": 0.799, - "step": 1891 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9776499704711805e-05, - "loss": 0.7803, - "step": 1892 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9776080180665593e-05, - "loss": 0.7608, - "step": 1893 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977566026771016e-05, - "loss": 0.8927, - "step": 1894 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9775239965862217e-05, - "loss": 1.0257, - "step": 1895 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977481927513848e-05, - "loss": 0.7954, - "step": 1896 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977439819555569e-05, - "loss": 0.8727, - "step": 1897 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9773976727130594e-05, - "loss": 0.8818, - "step": 1898 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9773554869879957e-05, - "loss": 0.8577, - "step": 1899 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9773132623820568e-05, - "loss": 0.8978, - "step": 1900 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977270998896922e-05, - "loss": 0.8259, - "step": 1901 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9772286965342727e-05, - "loss": 0.7591, - "step": 1902 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9771863552957916e-05, - "loss": 0.7954, - "step": 1903 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.9771439751831636e-05, - "loss": 0.8611, - "step": 1904 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977101556198074e-05, - "loss": 0.8069, - "step": 1905 - }, - { - "epoch": 0.34, - "grad_norm": 0.0, - "learning_rate": 1.977059098342211e-05, - "loss": 0.8814, - "step": 1906 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.977016601617263e-05, - "loss": 0.8358, - "step": 1907 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.976974066024921e-05, - "loss": 0.8144, - "step": 1908 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.976931491566877e-05, - "loss": 0.8804, - "step": 1909 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9768888782448247e-05, - "loss": 0.7428, - "step": 1910 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9768462260604592e-05, - "loss": 0.8157, - "step": 1911 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9768035350154774e-05, - "loss": 0.6446, - "step": 1912 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9767608051115777e-05, - "loss": 0.8921, - "step": 1913 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9767180363504597e-05, - "loss": 0.863, - "step": 1914 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9766752287338252e-05, - "loss": 0.7637, - "step": 1915 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.976632382263377e-05, - "loss": 0.8049, - "step": 1916 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9765894969408195e-05, - "loss": 0.8795, - "step": 1917 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.976546572767859e-05, - "loss": 0.8109, - "step": 1918 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9765036097462026e-05, - "loss": 0.9285, - "step": 1919 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.97646060787756e-05, - "loss": 0.8329, - "step": 1920 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9764175671636413e-05, - "loss": 0.8351, - "step": 1921 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9763744876061595e-05, - "loss": 0.7231, - "step": 1922 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9763313692068276e-05, - "loss": 0.7729, - "step": 1923 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9762882119673617e-05, - "loss": 0.7316, - "step": 1924 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9762450158894778e-05, - "loss": 0.7459, - "step": 1925 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9762017809748954e-05, - "loss": 0.7208, - "step": 1926 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9761585072253333e-05, - "loss": 0.9176, - "step": 1927 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9761151946425134e-05, - "loss": 0.8072, - "step": 1928 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9760718432281592e-05, - "loss": 0.8553, - "step": 1929 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.976028452983995e-05, - "loss": 0.7961, - "step": 1930 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9759850239117466e-05, - "loss": 0.7929, - "step": 1931 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9759415560131426e-05, - "loss": 0.7728, - "step": 1932 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9758980492899108e-05, - "loss": 0.8311, - "step": 1933 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9758545037437833e-05, - "loss": 0.8091, - "step": 1934 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975810919376492e-05, - "loss": 0.8749, - "step": 1935 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9757672961897705e-05, - "loss": 0.9115, - "step": 1936 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9757236341853542e-05, - "loss": 0.9164, - "step": 1937 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9756799333649806e-05, - "loss": 0.8401, - "step": 1938 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9756361937303877e-05, - "loss": 0.937, - "step": 1939 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9755924152833154e-05, - "loss": 0.721, - "step": 1940 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9755485980255055e-05, - "loss": 0.9136, - "step": 1941 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975504741958702e-05, - "loss": 0.8357, - "step": 1942 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9754608470846477e-05, - "loss": 0.7185, - "step": 1943 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9754169134050902e-05, - "loss": 0.8084, - "step": 1944 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975372940921777e-05, - "loss": 0.8388, - "step": 1945 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9753289296364573e-05, - "loss": 0.8192, - "step": 1946 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975284879550882e-05, - "loss": 0.8367, - "step": 1947 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975240790666803e-05, - "loss": 0.7857, - "step": 1948 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.975196662985975e-05, - "loss": 0.6948, - "step": 1949 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9751524965101535e-05, - "loss": 0.7908, - "step": 1950 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9751082912410947e-05, - "loss": 0.8495, - "step": 1951 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9750640471805578e-05, - "loss": 0.8775, - "step": 1952 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9750197643303025e-05, - "loss": 0.8358, - "step": 1953 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.974975442692091e-05, - "loss": 0.7276, - "step": 1954 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.974931082267686e-05, - "loss": 0.7646, - "step": 1955 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9748866830588525e-05, - "loss": 0.8066, - "step": 1956 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9748422450673565e-05, - "loss": 0.8549, - "step": 1957 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.974797768294966e-05, - "loss": 0.8971, - "step": 1958 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9747532527434505e-05, - "loss": 0.8606, - "step": 1959 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9747086984145807e-05, - "loss": 0.9694, - "step": 1960 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.974664105310129e-05, - "loss": 0.7848, - "step": 1961 - }, - { - "epoch": 0.35, - "grad_norm": 0.0, - "learning_rate": 1.9746194734318694e-05, - "loss": 0.6783, - "step": 1962 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974574802781578e-05, - "loss": 0.8949, - "step": 1963 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9745300933610308e-05, - "loss": 0.7227, - "step": 1964 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9744853451720075e-05, - "loss": 0.7351, - "step": 1965 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9744405582162875e-05, - "loss": 0.7547, - "step": 1966 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974395732495653e-05, - "loss": 0.6924, - "step": 1967 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974350868011887e-05, - "loss": 0.8145, - "step": 1968 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9743059647667743e-05, - "loss": 0.8809, - "step": 1969 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974261022762101e-05, - "loss": 0.7411, - "step": 1970 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9742160419996556e-05, - "loss": 0.7719, - "step": 1971 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974171022481227e-05, - "loss": 0.8811, - "step": 1972 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9741259642086063e-05, - "loss": 0.7351, - "step": 1973 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.974080867183586e-05, - "loss": 0.7242, - "step": 1974 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.97403573140796e-05, - "loss": 0.8759, - "step": 1975 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9739905568835236e-05, - "loss": 0.9524, - "step": 1976 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973945343612075e-05, - "loss": 0.8428, - "step": 1977 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973900091595412e-05, - "loss": 0.7554, - "step": 1978 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9738548008353343e-05, - "loss": 0.992, - "step": 1979 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973809471333645e-05, - "loss": 0.7732, - "step": 1980 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9737641030921467e-05, - "loss": 0.7882, - "step": 1981 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973718696112644e-05, - "loss": 0.7125, - "step": 1982 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9736732503969436e-05, - "loss": 0.7759, - "step": 1983 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9736277659468533e-05, - "loss": 0.7624, - "step": 1984 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9735822427641828e-05, - "loss": 0.8438, - "step": 1985 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9735366808507423e-05, - "loss": 0.8669, - "step": 1986 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973491080208345e-05, - "loss": 0.8493, - "step": 1987 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973445440838805e-05, - "loss": 0.8271, - "step": 1988 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973399762743938e-05, - "loss": 0.822, - "step": 1989 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9733540459255605e-05, - "loss": 0.9187, - "step": 1990 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9733082903854923e-05, - "loss": 0.813, - "step": 1991 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973262496125552e-05, - "loss": 0.8607, - "step": 1992 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973216663147563e-05, - "loss": 0.8346, - "step": 1993 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9731707914533476e-05, - "loss": 0.809, - "step": 1994 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.973124881044731e-05, - "loss": 0.8014, - "step": 1995 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9730789319235395e-05, - "loss": 0.8147, - "step": 1996 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9730329440916013e-05, - "loss": 0.8575, - "step": 1997 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9729869175507457e-05, - "loss": 0.8739, - "step": 1998 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9729408523028035e-05, - "loss": 0.9034, - "step": 1999 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9728947483496078e-05, - "loss": 0.6697, - "step": 2000 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.972848605692992e-05, - "loss": 0.8904, - "step": 2001 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.972802424334792e-05, - "loss": 0.8779, - "step": 2002 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9727562042768452e-05, - "loss": 0.7122, - "step": 2003 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9727099455209906e-05, - "loss": 0.9545, - "step": 2004 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9726636480690676e-05, - "loss": 0.7307, - "step": 2005 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9726173119229185e-05, - "loss": 0.8768, - "step": 2006 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9725709370843863e-05, - "loss": 0.7188, - "step": 2007 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9725245235553166e-05, - "loss": 0.79, - "step": 2008 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.972478071337555e-05, - "loss": 0.9243, - "step": 2009 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9724315804329498e-05, - "loss": 0.8315, - "step": 2010 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9723850508433508e-05, - "loss": 0.8552, - "step": 2011 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9723384825706082e-05, - "loss": 0.8252, - "step": 2012 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9722918756165753e-05, - "loss": 0.7949, - "step": 2013 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9722452299831057e-05, - "loss": 0.7753, - "step": 2014 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9721985456720556e-05, - "loss": 0.8304, - "step": 2015 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.9721518226852815e-05, - "loss": 0.8923, - "step": 2016 - }, - { - "epoch": 0.36, - "grad_norm": 0.0, - "learning_rate": 1.972105061024643e-05, - "loss": 0.8392, - "step": 2017 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9720582606919998e-05, - "loss": 0.8189, - "step": 2018 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9720114216892137e-05, - "loss": 0.8536, - "step": 2019 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9719645440181477e-05, - "loss": 0.8649, - "step": 2020 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9719176276806674e-05, - "loss": 0.9224, - "step": 2021 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9718706726786393e-05, - "loss": 0.7784, - "step": 2022 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.97182367901393e-05, - "loss": 0.858, - "step": 2023 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9717766466884106e-05, - "loss": 0.7924, - "step": 2024 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9717295757039513e-05, - "loss": 0.9587, - "step": 2025 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9716824660624248e-05, - "loss": 0.83, - "step": 2026 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9716353177657054e-05, - "loss": 0.7234, - "step": 2027 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9715881308156685e-05, - "loss": 1.0402, - "step": 2028 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971540905214191e-05, - "loss": 0.7855, - "step": 2029 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971493640963152e-05, - "loss": 0.8998, - "step": 2030 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971446338064432e-05, - "loss": 0.8768, - "step": 2031 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9713989965199123e-05, - "loss": 0.904, - "step": 2032 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9713516163314766e-05, - "loss": 0.7617, - "step": 2033 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9713041975010093e-05, - "loss": 0.9395, - "step": 2034 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971256740030397e-05, - "loss": 0.8998, - "step": 2035 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971209243921528e-05, - "loss": 0.8811, - "step": 2036 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9711617091762917e-05, - "loss": 0.7315, - "step": 2037 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9711141357965787e-05, - "loss": 0.962, - "step": 2038 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9710665237842818e-05, - "loss": 0.8133, - "step": 2039 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.971018873141295e-05, - "loss": 0.8637, - "step": 2040 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9709711838695137e-05, - "loss": 0.7776, - "step": 2041 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9709234559708357e-05, - "loss": 0.8457, - "step": 2042 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9708756894471593e-05, - "loss": 0.7358, - "step": 2043 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9708278843003848e-05, - "loss": 0.8425, - "step": 2044 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.970780040532414e-05, - "loss": 0.7976, - "step": 2045 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.97073215814515e-05, - "loss": 0.7356, - "step": 2046 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9706842371404978e-05, - "loss": 0.9149, - "step": 2047 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9706362775203636e-05, - "loss": 0.8472, - "step": 2048 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9705882792866557e-05, - "loss": 0.7362, - "step": 2049 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9705402424412833e-05, - "loss": 0.78, - "step": 2050 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9704921669861575e-05, - "loss": 0.8921, - "step": 2051 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9704440529231905e-05, - "loss": 0.7205, - "step": 2052 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.970395900254297e-05, - "loss": 0.906, - "step": 2053 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.970347708981392e-05, - "loss": 0.9379, - "step": 2054 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.970299479106393e-05, - "loss": 0.6815, - "step": 2055 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9702512106312182e-05, - "loss": 0.8369, - "step": 2056 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9702029035577882e-05, - "loss": 0.7438, - "step": 2057 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9701545578880247e-05, - "loss": 0.8386, - "step": 2058 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.970106173623851e-05, - "loss": 0.9338, - "step": 2059 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9700577507671917e-05, - "loss": 1.0703, - "step": 2060 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9700092893199732e-05, - "loss": 0.8507, - "step": 2061 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.969960789284124e-05, - "loss": 0.7933, - "step": 2062 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9699122506615724e-05, - "loss": 0.892, - "step": 2063 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.96986367345425e-05, - "loss": 0.8723, - "step": 2064 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9698150576640895e-05, - "loss": 0.9457, - "step": 2065 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9697664032930243e-05, - "loss": 0.9118, - "step": 2066 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9697177103429904e-05, - "loss": 0.8402, - "step": 2067 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9696689788159245e-05, - "loss": 0.8662, - "step": 2068 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.969620208713766e-05, - "loss": 0.7292, - "step": 2069 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.9695714000384543e-05, - "loss": 0.793, - "step": 2070 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.969522552791931e-05, - "loss": 0.7448, - "step": 2071 - }, - { - "epoch": 0.37, - "grad_norm": 0.0, - "learning_rate": 1.96947366697614e-05, - "loss": 0.8365, - "step": 2072 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9694247425930257e-05, - "loss": 0.8091, - "step": 2073 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9693757796445345e-05, - "loss": 0.8479, - "step": 2074 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.969326778132614e-05, - "loss": 0.8846, - "step": 2075 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.969277738059214e-05, - "loss": 0.8161, - "step": 2076 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.969228659426285e-05, - "loss": 0.8795, - "step": 2077 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.969179542235779e-05, - "loss": 0.6855, - "step": 2078 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9691303864896514e-05, - "loss": 0.8328, - "step": 2079 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9690811921898562e-05, - "loss": 0.846, - "step": 2080 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9690319593383512e-05, - "loss": 0.8093, - "step": 2081 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.968982687937095e-05, - "loss": 0.8539, - "step": 2082 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.968933377988047e-05, - "loss": 0.8182, - "step": 2083 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9688840294931698e-05, - "loss": 0.8625, - "step": 2084 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9688346424544262e-05, - "loss": 0.8112, - "step": 2085 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9687852168737803e-05, - "loss": 0.9887, - "step": 2086 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9687357527531997e-05, - "loss": 0.8481, - "step": 2087 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9686862500946505e-05, - "loss": 0.9423, - "step": 2088 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9686367089001033e-05, - "loss": 0.8145, - "step": 2089 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9685871291715282e-05, - "loss": 0.9106, - "step": 2090 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9685375109108978e-05, - "loss": 0.8458, - "step": 2091 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9684878541201864e-05, - "loss": 0.8509, - "step": 2092 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9684381588013686e-05, - "loss": 0.9524, - "step": 2093 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.968388424956422e-05, - "loss": 0.846, - "step": 2094 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.968338652587325e-05, - "loss": 0.8261, - "step": 2095 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9682888416960572e-05, - "loss": 0.7836, - "step": 2096 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9682389922846007e-05, - "loss": 0.8157, - "step": 2097 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9681891043549387e-05, - "loss": 0.7981, - "step": 2098 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9681391779090554e-05, - "loss": 0.8459, - "step": 2099 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9680892129489367e-05, - "loss": 0.8902, - "step": 2100 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.968039209476571e-05, - "loss": 0.902, - "step": 2101 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967989167493947e-05, - "loss": 0.7478, - "step": 2102 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967939087003056e-05, - "loss": 0.9596, - "step": 2103 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9678889680058896e-05, - "loss": 0.9693, - "step": 2104 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9678388105044423e-05, - "loss": 0.8281, - "step": 2105 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967788614500709e-05, - "loss": 0.6238, - "step": 2106 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9677383799966866e-05, - "loss": 0.7582, - "step": 2107 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9676881069943738e-05, - "loss": 0.7337, - "step": 2108 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.96763779549577e-05, - "loss": 0.7806, - "step": 2109 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9675874455028774e-05, - "loss": 0.6891, - "step": 2110 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9675370570176985e-05, - "loss": 0.9519, - "step": 2111 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967486630042238e-05, - "loss": 0.8535, - "step": 2112 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967436164578502e-05, - "loss": 0.7928, - "step": 2113 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967385660628498e-05, - "loss": 0.861, - "step": 2114 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967335118194235e-05, - "loss": 0.7767, - "step": 2115 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9672845372777244e-05, - "loss": 0.8857, - "step": 2116 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9672339178809775e-05, - "loss": 0.8488, - "step": 2117 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9671832600060083e-05, - "loss": 0.7889, - "step": 2118 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9671325636548324e-05, - "loss": 0.9142, - "step": 2119 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967081828829466e-05, - "loss": 0.774, - "step": 2120 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.967031055531928e-05, - "loss": 0.6764, - "step": 2121 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.966980243764238e-05, - "loss": 0.8311, - "step": 2122 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9669293935284174e-05, - "loss": 0.8932, - "step": 2123 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.966878504826489e-05, - "loss": 0.8455, - "step": 2124 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9668275776604774e-05, - "loss": 0.8324, - "step": 2125 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9667766120324085e-05, - "loss": 0.854, - "step": 2126 - }, - { - "epoch": 0.38, - "grad_norm": 0.0, - "learning_rate": 1.9667256079443095e-05, - "loss": 0.7033, - "step": 2127 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.96667456539821e-05, - "loss": 0.9179, - "step": 2128 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.96662348439614e-05, - "loss": 0.8007, - "step": 2129 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9665723649401323e-05, - "loss": 0.7798, - "step": 2130 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.96652120703222e-05, - "loss": 0.8905, - "step": 2131 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.966470010674438e-05, - "loss": 0.8313, - "step": 2132 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9664187758688236e-05, - "loss": 0.7484, - "step": 2133 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9663675026174146e-05, - "loss": 0.759, - "step": 2134 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.966316190922251e-05, - "loss": 0.8692, - "step": 2135 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.966264840785374e-05, - "loss": 0.9253, - "step": 2136 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9662134522088258e-05, - "loss": 0.7228, - "step": 2137 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9661620251946518e-05, - "loss": 0.8875, - "step": 2138 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9661105597448974e-05, - "loss": 0.824, - "step": 2139 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9660590558616096e-05, - "loss": 1.0195, - "step": 2140 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9660075135468375e-05, - "loss": 0.8418, - "step": 2141 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9659559328026318e-05, - "loss": 0.6264, - "step": 2142 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9659043136310444e-05, - "loss": 0.7681, - "step": 2143 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9658526560341286e-05, - "loss": 0.782, - "step": 2144 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9658009600139396e-05, - "loss": 0.8421, - "step": 2145 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.965749225572534e-05, - "loss": 0.8321, - "step": 2146 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9656974527119693e-05, - "loss": 0.8633, - "step": 2147 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9656456414343063e-05, - "loss": 0.8578, - "step": 2148 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.965593791741605e-05, - "loss": 0.7937, - "step": 2149 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.965541903635929e-05, - "loss": 0.7928, - "step": 2150 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9654899771193413e-05, - "loss": 0.8692, - "step": 2151 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9654380121939088e-05, - "loss": 0.7876, - "step": 2152 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9653860088616982e-05, - "loss": 0.793, - "step": 2153 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9653339671247783e-05, - "loss": 0.8453, - "step": 2154 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9652818869852196e-05, - "loss": 0.7414, - "step": 2155 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9652297684450936e-05, - "loss": 0.8974, - "step": 2156 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.965177611506474e-05, - "loss": 0.8246, - "step": 2157 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9651254161714358e-05, - "loss": 0.89, - "step": 2158 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.965073182442055e-05, - "loss": 0.8806, - "step": 2159 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9650209103204095e-05, - "loss": 0.8445, - "step": 2160 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964968599808579e-05, - "loss": 0.8996, - "step": 2161 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9649162509086447e-05, - "loss": 0.715, - "step": 2162 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9648638636226895e-05, - "loss": 0.8926, - "step": 2163 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964811437952796e-05, - "loss": 1.0033, - "step": 2164 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964758973901051e-05, - "loss": 0.8381, - "step": 2165 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9647064714695415e-05, - "loss": 0.9154, - "step": 2166 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9646539306603557e-05, - "loss": 0.8892, - "step": 2167 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9646013514755844e-05, - "loss": 0.8226, - "step": 2168 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9645487339173183e-05, - "loss": 0.8029, - "step": 2169 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9644960779876516e-05, - "loss": 0.7171, - "step": 2170 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9644433836886785e-05, - "loss": 0.8858, - "step": 2171 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9643906510224956e-05, - "loss": 0.7498, - "step": 2172 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9643378799912002e-05, - "loss": 0.6862, - "step": 2173 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964285070596892e-05, - "loss": 0.9607, - "step": 2174 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964232222841672e-05, - "loss": 0.7203, - "step": 2175 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9641793367276423e-05, - "loss": 0.7908, - "step": 2176 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964126412256907e-05, - "loss": 0.835, - "step": 2177 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.964073449431571e-05, - "loss": 0.7912, - "step": 2178 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9640204482537417e-05, - "loss": 0.9142, - "step": 2179 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9639674087255276e-05, - "loss": 0.7428, - "step": 2180 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9639143308490386e-05, - "loss": 0.8435, - "step": 2181 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9638612146263862e-05, - "loss": 0.8307, - "step": 2182 - }, - { - "epoch": 0.39, - "grad_norm": 0.0, - "learning_rate": 1.9638080600596836e-05, - "loss": 0.6964, - "step": 2183 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9637548671510452e-05, - "loss": 0.7348, - "step": 2184 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.963701635902587e-05, - "loss": 0.7882, - "step": 2185 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.963648366316427e-05, - "loss": 0.8389, - "step": 2186 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9635950583946844e-05, - "loss": 0.8413, - "step": 2187 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9635417121394792e-05, - "loss": 0.7297, - "step": 2188 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9634883275529346e-05, - "loss": 0.8065, - "step": 2189 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9634349046371734e-05, - "loss": 0.8416, - "step": 2190 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9633814433943215e-05, - "loss": 0.7447, - "step": 2191 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9633279438265052e-05, - "loss": 0.8143, - "step": 2192 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9632744059358527e-05, - "loss": 1.006, - "step": 2193 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.963220829724495e-05, - "loss": 0.85, - "step": 2194 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.963167215194562e-05, - "loss": 0.94, - "step": 2195 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9631135623481872e-05, - "loss": 0.8491, - "step": 2196 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.963059871187505e-05, - "loss": 0.7844, - "step": 2197 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9630061417146512e-05, - "loss": 0.6899, - "step": 2198 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9629523739317636e-05, - "loss": 0.8566, - "step": 2199 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962898567840981e-05, - "loss": 0.8666, - "step": 2200 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9628447234444437e-05, - "loss": 0.7289, - "step": 2201 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962790840744294e-05, - "loss": 0.8281, - "step": 2202 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962736919742675e-05, - "loss": 0.7252, - "step": 2203 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962682960441732e-05, - "loss": 0.9602, - "step": 2204 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962628962843612e-05, - "loss": 0.8152, - "step": 2205 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9625749269504628e-05, - "loss": 0.7573, - "step": 2206 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962520852764434e-05, - "loss": 0.9222, - "step": 2207 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9624667402876766e-05, - "loss": 0.9686, - "step": 2208 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9624125895223436e-05, - "loss": 0.8655, - "step": 2209 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962358400470589e-05, - "loss": 0.8144, - "step": 2210 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9623041731345686e-05, - "loss": 0.815, - "step": 2211 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9622499075164397e-05, - "loss": 0.6738, - "step": 2212 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962195603618361e-05, - "loss": 0.857, - "step": 2213 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9621412614424935e-05, - "loss": 0.8693, - "step": 2214 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9620868809909976e-05, - "loss": 0.8717, - "step": 2215 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.962032462266038e-05, - "loss": 0.9437, - "step": 2216 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9619780052697782e-05, - "loss": 0.8796, - "step": 2217 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.961923510004386e-05, - "loss": 0.6625, - "step": 2218 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9618689764720287e-05, - "loss": 0.9776, - "step": 2219 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9618144046748756e-05, - "loss": 0.7669, - "step": 2220 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9617597946150976e-05, - "loss": 0.7668, - "step": 2221 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9617051462948678e-05, - "loss": 0.896, - "step": 2222 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9616504597163593e-05, - "loss": 0.8543, - "step": 2223 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9615957348817484e-05, - "loss": 0.7333, - "step": 2224 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9615409717932122e-05, - "loss": 0.8242, - "step": 2225 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9614861704529284e-05, - "loss": 0.721, - "step": 2226 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9614313308630778e-05, - "loss": 0.8001, - "step": 2227 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.961376453025842e-05, - "loss": 0.8664, - "step": 2228 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9613215369434038e-05, - "loss": 0.8499, - "step": 2229 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.961266582617948e-05, - "loss": 0.9073, - "step": 2230 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.961211590051661e-05, - "loss": 0.728, - "step": 2231 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.96115655924673e-05, - "loss": 0.7184, - "step": 2232 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9611014902053447e-05, - "loss": 0.9429, - "step": 2233 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9610463829296955e-05, - "loss": 0.8042, - "step": 2234 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9609912374219752e-05, - "loss": 0.9396, - "step": 2235 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.960936053684377e-05, - "loss": 0.8119, - "step": 2236 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.960880831719096e-05, - "loss": 0.9423, - "step": 2237 - }, - { - "epoch": 0.4, - "grad_norm": 0.0, - "learning_rate": 1.9608255715283298e-05, - "loss": 0.8582, - "step": 2238 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9607702731142764e-05, - "loss": 0.9805, - "step": 2239 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9607149364791356e-05, - "loss": 0.9358, - "step": 2240 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.960659561625109e-05, - "loss": 0.9579, - "step": 2241 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9606041485543992e-05, - "loss": 0.8653, - "step": 2242 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9605486972692107e-05, - "loss": 0.8168, - "step": 2243 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.96049320777175e-05, - "loss": 0.7297, - "step": 2244 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9604376800642233e-05, - "loss": 1.0315, - "step": 2245 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9603821141488408e-05, - "loss": 0.7696, - "step": 2246 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9603265100278123e-05, - "loss": 0.8546, - "step": 2247 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9602708677033506e-05, - "loss": 0.7788, - "step": 2248 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9602151871776683e-05, - "loss": 0.919, - "step": 2249 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.960159468452981e-05, - "loss": 0.8621, - "step": 2250 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.960103711531505e-05, - "loss": 1.0234, - "step": 2251 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.960047916415459e-05, - "loss": 0.7217, - "step": 2252 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.959992083107062e-05, - "loss": 0.7738, - "step": 2253 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9599362116085357e-05, - "loss": 0.7309, - "step": 2254 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9598803019221022e-05, - "loss": 0.818, - "step": 2255 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9598243540499858e-05, - "loss": 0.8007, - "step": 2256 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9597683679944125e-05, - "loss": 0.9371, - "step": 2257 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9597123437576094e-05, - "loss": 0.778, - "step": 2258 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.959656281341805e-05, - "loss": 0.7961, - "step": 2259 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.95960018074923e-05, - "loss": 0.9504, - "step": 2260 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9595440419821155e-05, - "loss": 0.8757, - "step": 2261 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9594878650426955e-05, - "loss": 0.7658, - "step": 2262 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.959431649933205e-05, - "loss": 0.8322, - "step": 2263 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.959375396655879e-05, - "loss": 0.9006, - "step": 2264 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9593191052129565e-05, - "loss": 0.7955, - "step": 2265 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9592627756066765e-05, - "loss": 0.8244, - "step": 2266 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9592064078392804e-05, - "loss": 0.7757, - "step": 2267 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.95915000191301e-05, - "loss": 0.8025, - "step": 2268 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9590935578301094e-05, - "loss": 0.7713, - "step": 2269 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.959037075592824e-05, - "loss": 0.9525, - "step": 2270 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9589805552034008e-05, - "loss": 0.9242, - "step": 2271 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9589239966640883e-05, - "loss": 0.9033, - "step": 2272 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9588673999771365e-05, - "loss": 0.9357, - "step": 2273 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9588107651447967e-05, - "loss": 0.7876, - "step": 2274 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9587540921693224e-05, - "loss": 0.8006, - "step": 2275 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.958697381052968e-05, - "loss": 0.7523, - "step": 2276 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.958640631797989e-05, - "loss": 0.8144, - "step": 2277 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9585838444066437e-05, - "loss": 0.7449, - "step": 2278 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9585270188811913e-05, - "loss": 0.8243, - "step": 2279 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9584701552238914e-05, - "loss": 1.0367, - "step": 2280 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.958413253437007e-05, - "loss": 0.8887, - "step": 2281 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9583563135228013e-05, - "loss": 0.8297, - "step": 2282 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.95829933548354e-05, - "loss": 0.7912, - "step": 2283 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9582423193214897e-05, - "loss": 0.7442, - "step": 2284 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9581852650389178e-05, - "loss": 0.8591, - "step": 2285 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.958128172638095e-05, - "loss": 0.7924, - "step": 2286 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9580710421212918e-05, - "loss": 0.9051, - "step": 2287 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9580138734907818e-05, - "loss": 0.7227, - "step": 2288 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9579566667488385e-05, - "loss": 0.7646, - "step": 2289 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9578994218977377e-05, - "loss": 0.9351, - "step": 2290 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.957842138939757e-05, - "loss": 0.8564, - "step": 2291 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9577848178771754e-05, - "loss": 0.8996, - "step": 2292 - }, - { - "epoch": 0.41, - "grad_norm": 0.0, - "learning_rate": 1.9577274587122726e-05, - "loss": 0.7265, - "step": 2293 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.957670061447331e-05, - "loss": 0.977, - "step": 2294 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.957612626084634e-05, - "loss": 0.9148, - "step": 2295 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.957555152626466e-05, - "loss": 0.9244, - "step": 2296 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9574976410751137e-05, - "loss": 1.0937, - "step": 2297 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9574400914328648e-05, - "loss": 0.6961, - "step": 2298 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9573825037020087e-05, - "loss": 0.7938, - "step": 2299 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.957324877884837e-05, - "loss": 0.913, - "step": 2300 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9572672139836415e-05, - "loss": 0.767, - "step": 2301 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9572095120007165e-05, - "loss": 0.7459, - "step": 2302 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.957151771938357e-05, - "loss": 0.8907, - "step": 2303 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9570939937988605e-05, - "loss": 0.8047, - "step": 2304 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9570361775845254e-05, - "loss": 0.7709, - "step": 2305 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9569783232976514e-05, - "loss": 0.8497, - "step": 2306 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9569204309405404e-05, - "loss": 0.665, - "step": 2307 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9568625005154954e-05, - "loss": 0.7146, - "step": 2308 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9568045320248212e-05, - "loss": 0.8359, - "step": 2309 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9567465254708235e-05, - "loss": 0.8214, - "step": 2310 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.95668848085581e-05, - "loss": 0.7891, - "step": 2311 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9566303981820894e-05, - "loss": 0.8506, - "step": 2312 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9565722774519732e-05, - "loss": 0.8136, - "step": 2313 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.956514118667773e-05, - "loss": 0.7857, - "step": 2314 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9564559218318026e-05, - "loss": 0.9065, - "step": 2315 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9563976869463768e-05, - "loss": 0.7591, - "step": 2316 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9563394140138128e-05, - "loss": 0.8654, - "step": 2317 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.956281103036429e-05, - "loss": 0.8881, - "step": 2318 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9562227540165444e-05, - "loss": 0.8161, - "step": 2319 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9561643669564803e-05, - "loss": 0.8059, - "step": 2320 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9561059418585603e-05, - "loss": 0.8237, - "step": 2321 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9560474787251075e-05, - "loss": 0.728, - "step": 2322 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9559889775584484e-05, - "loss": 0.8929, - "step": 2323 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.95593043836091e-05, - "loss": 0.8044, - "step": 2324 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9558718611348214e-05, - "loss": 0.8321, - "step": 2325 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9558132458825123e-05, - "loss": 0.8248, - "step": 2326 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.955754592606315e-05, - "loss": 0.882, - "step": 2327 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.955695901308563e-05, - "loss": 0.7955, - "step": 2328 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9556371719915906e-05, - "loss": 0.8792, - "step": 2329 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9555784046577345e-05, - "loss": 0.8209, - "step": 2330 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.955519599309332e-05, - "loss": 0.9491, - "step": 2331 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9554607559487237e-05, - "loss": 0.924, - "step": 2332 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9554018745782493e-05, - "loss": 0.9204, - "step": 2333 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.955342955200252e-05, - "loss": 0.8554, - "step": 2334 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9552839978170748e-05, - "loss": 0.8278, - "step": 2335 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.955225002431064e-05, - "loss": 0.7423, - "step": 2336 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9551659690445662e-05, - "loss": 0.8969, - "step": 2337 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9551068976599294e-05, - "loss": 0.7748, - "step": 2338 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9550477882795043e-05, - "loss": 0.8162, - "step": 2339 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9549886409056428e-05, - "loss": 0.8049, - "step": 2340 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9549294555406964e-05, - "loss": 0.8647, - "step": 2341 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9548702321870206e-05, - "loss": 0.6867, - "step": 2342 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9548109708469712e-05, - "loss": 0.7254, - "step": 2343 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9547516715229053e-05, - "loss": 0.8615, - "step": 2344 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9546923342171828e-05, - "loss": 0.8409, - "step": 2345 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9546329589321637e-05, - "loss": 0.8949, - "step": 2346 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.95457354567021e-05, - "loss": 0.8831, - "step": 2347 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.9545140944336856e-05, - "loss": 0.7114, - "step": 2348 - }, - { - "epoch": 0.42, - "grad_norm": 0.0, - "learning_rate": 1.954454605224955e-05, - "loss": 0.7485, - "step": 2349 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9543950780463853e-05, - "loss": 0.7743, - "step": 2350 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9543355129003447e-05, - "loss": 0.8845, - "step": 2351 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.954275909789202e-05, - "loss": 0.7469, - "step": 2352 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9542162687153293e-05, - "loss": 0.6833, - "step": 2353 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9541565896810987e-05, - "loss": 0.809, - "step": 2354 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.954096872688884e-05, - "loss": 0.9252, - "step": 2355 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9540371177410615e-05, - "loss": 0.7456, - "step": 2356 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953977324840008e-05, - "loss": 0.8657, - "step": 2357 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9539174939881026e-05, - "loss": 0.6398, - "step": 2358 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953857625187725e-05, - "loss": 0.8667, - "step": 2359 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953797718441257e-05, - "loss": 0.8002, - "step": 2360 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9537377737510815e-05, - "loss": 0.8513, - "step": 2361 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953677791119584e-05, - "loss": 0.7801, - "step": 2362 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.95361777054915e-05, - "loss": 0.8831, - "step": 2363 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9535577120421675e-05, - "loss": 0.7576, - "step": 2364 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9534976156010255e-05, - "loss": 0.8848, - "step": 2365 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953437481228115e-05, - "loss": 0.7895, - "step": 2366 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9533773089258284e-05, - "loss": 0.9371, - "step": 2367 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953317098696559e-05, - "loss": 0.8407, - "step": 2368 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9532568505427022e-05, - "loss": 0.8796, - "step": 2369 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953196564466655e-05, - "loss": 0.6834, - "step": 2370 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9531362404708155e-05, - "loss": 0.9539, - "step": 2371 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953075878557584e-05, - "loss": 0.9579, - "step": 2372 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.953015478729361e-05, - "loss": 0.9017, - "step": 2373 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9529550409885494e-05, - "loss": 0.9422, - "step": 2374 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952894565337554e-05, - "loss": 0.8326, - "step": 2375 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9528340517787802e-05, - "loss": 0.965, - "step": 2376 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9527735003146357e-05, - "loss": 0.7904, - "step": 2377 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9527129109475292e-05, - "loss": 0.8645, - "step": 2378 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952652283679871e-05, - "loss": 0.7585, - "step": 2379 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952591618514073e-05, - "loss": 0.8927, - "step": 2380 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9525309154525483e-05, - "loss": 0.8377, - "step": 2381 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9524701744977124e-05, - "loss": 0.8378, - "step": 2382 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952409395651981e-05, - "loss": 0.7816, - "step": 2383 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9523485789177723e-05, - "loss": 0.9324, - "step": 2384 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952287724297506e-05, - "loss": 0.9472, - "step": 2385 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9522268317936023e-05, - "loss": 0.7784, - "step": 2386 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9521659014084838e-05, - "loss": 0.7871, - "step": 2387 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.952104933144575e-05, - "loss": 0.7248, - "step": 2388 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9520439270043005e-05, - "loss": 0.9218, - "step": 2389 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9519828829900878e-05, - "loss": 0.7483, - "step": 2390 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9519218011043653e-05, - "loss": 0.9618, - "step": 2391 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951860681349563e-05, - "loss": 0.8088, - "step": 2392 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951799523728112e-05, - "loss": 0.8928, - "step": 2393 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951738328242445e-05, - "loss": 0.8807, - "step": 2394 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9516770948949975e-05, - "loss": 0.7112, - "step": 2395 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9516158236882047e-05, - "loss": 0.8176, - "step": 2396 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9515545146245042e-05, - "loss": 1.0085, - "step": 2397 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951493167706335e-05, - "loss": 0.9203, - "step": 2398 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9514317829361374e-05, - "loss": 0.7179, - "step": 2399 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9513703603163536e-05, - "loss": 0.7577, - "step": 2400 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951308899849427e-05, - "loss": 0.7331, - "step": 2401 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951247401537803e-05, - "loss": 0.8832, - "step": 2402 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.951185865383928e-05, - "loss": 0.8018, - "step": 2403 - }, - { - "epoch": 0.43, - "grad_norm": 0.0, - "learning_rate": 1.9511242913902494e-05, - "loss": 0.8541, - "step": 2404 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.951062679559217e-05, - "loss": 0.841, - "step": 2405 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9510010298932823e-05, - "loss": 0.7933, - "step": 2406 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9509393423948973e-05, - "loss": 0.888, - "step": 2407 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950877617066516e-05, - "loss": 0.8294, - "step": 2408 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9508158539105943e-05, - "loss": 0.7932, - "step": 2409 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9507540529295895e-05, - "loss": 0.9765, - "step": 2410 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9506922141259595e-05, - "loss": 0.8676, - "step": 2411 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9506303375021643e-05, - "loss": 0.8695, - "step": 2412 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950568423060666e-05, - "loss": 0.8629, - "step": 2413 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9505064708039274e-05, - "loss": 0.8721, - "step": 2414 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950444480734413e-05, - "loss": 0.7025, - "step": 2415 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950382452854589e-05, - "loss": 0.7841, - "step": 2416 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950320387166923e-05, - "loss": 0.8851, - "step": 2417 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950258283673884e-05, - "loss": 0.9041, - "step": 2418 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9501961423779424e-05, - "loss": 0.8081, - "step": 2419 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.950133963281571e-05, - "loss": 0.8237, - "step": 2420 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9500717463872424e-05, - "loss": 0.8288, - "step": 2421 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9500094916974318e-05, - "loss": 0.8974, - "step": 2422 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9499471992146167e-05, - "loss": 0.7838, - "step": 2423 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.949884868941275e-05, - "loss": 0.879, - "step": 2424 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9498225008798854e-05, - "loss": 0.7961, - "step": 2425 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9497600950329297e-05, - "loss": 0.7621, - "step": 2426 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9496976514028904e-05, - "loss": 0.8709, - "step": 2427 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.949635169992252e-05, - "loss": 0.8124, - "step": 2428 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.949572650803499e-05, - "loss": 0.8572, - "step": 2429 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9495100938391196e-05, - "loss": 0.8013, - "step": 2430 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9494474991016022e-05, - "loss": 0.8283, - "step": 2431 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9493848665934367e-05, - "loss": 0.9147, - "step": 2432 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9493221963171148e-05, - "loss": 0.846, - "step": 2433 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9492594882751294e-05, - "loss": 0.7792, - "step": 2434 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9491967424699757e-05, - "loss": 0.816, - "step": 2435 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9491339589041495e-05, - "loss": 0.674, - "step": 2436 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9490711375801484e-05, - "loss": 0.8071, - "step": 2437 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9490082785004713e-05, - "loss": 0.6611, - "step": 2438 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9489453816676196e-05, - "loss": 0.7932, - "step": 2439 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948882447084095e-05, - "loss": 0.8891, - "step": 2440 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948819474752401e-05, - "loss": 0.7621, - "step": 2441 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9487564646750426e-05, - "loss": 0.6895, - "step": 2442 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948693416854527e-05, - "loss": 0.9244, - "step": 2443 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948630331293362e-05, - "loss": 0.768, - "step": 2444 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9485672079940573e-05, - "loss": 0.7897, - "step": 2445 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948504046959124e-05, - "loss": 0.8583, - "step": 2446 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948440848191075e-05, - "loss": 0.6547, - "step": 2447 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9483776116924244e-05, - "loss": 0.9057, - "step": 2448 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9483143374656876e-05, - "loss": 0.792, - "step": 2449 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948251025513382e-05, - "loss": 0.8432, - "step": 2450 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.948187675838026e-05, - "loss": 0.8781, - "step": 2451 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9481242884421402e-05, - "loss": 0.8988, - "step": 2452 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9480608633282455e-05, - "loss": 0.6337, - "step": 2453 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9479974004988657e-05, - "loss": 0.8048, - "step": 2454 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9479338999565257e-05, - "loss": 0.7566, - "step": 2455 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.947870361703751e-05, - "loss": 0.9136, - "step": 2456 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9478067857430692e-05, - "loss": 0.8924, - "step": 2457 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9477431720770102e-05, - "loss": 0.921, - "step": 2458 - }, - { - "epoch": 0.44, - "grad_norm": 0.0, - "learning_rate": 1.9476795207081042e-05, - "loss": 0.8699, - "step": 2459 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9476158316388832e-05, - "loss": 0.8224, - "step": 2460 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9475521048718812e-05, - "loss": 0.7918, - "step": 2461 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.947488340409633e-05, - "loss": 0.8114, - "step": 2462 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9474245382546758e-05, - "loss": 0.7118, - "step": 2463 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9473606984095474e-05, - "loss": 0.9482, - "step": 2464 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9472968208767872e-05, - "loss": 0.8083, - "step": 2465 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.947232905658937e-05, - "loss": 0.7438, - "step": 2466 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.947168952758539e-05, - "loss": 0.9599, - "step": 2467 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9471049621781372e-05, - "loss": 0.8366, - "step": 2468 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.947040933920278e-05, - "loss": 0.8457, - "step": 2469 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9469768679875078e-05, - "loss": 0.7548, - "step": 2470 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9469127643823757e-05, - "loss": 0.9164, - "step": 2471 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.946848623107431e-05, - "loss": 0.7483, - "step": 2472 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.946784444165227e-05, - "loss": 0.8698, - "step": 2473 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9467202275583153e-05, - "loss": 0.8616, - "step": 2474 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9466559732892515e-05, - "loss": 0.8846, - "step": 2475 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.946591681360591e-05, - "loss": 0.9474, - "step": 2476 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.946527351774892e-05, - "loss": 0.8294, - "step": 2477 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9464629845347134e-05, - "loss": 0.9781, - "step": 2478 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9463985796426162e-05, - "loss": 0.8603, - "step": 2479 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9463341371011618e-05, - "loss": 0.9505, - "step": 2480 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9462696569129143e-05, - "loss": 0.7943, - "step": 2481 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.946205139080439e-05, - "loss": 0.7983, - "step": 2482 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9461405836063024e-05, - "loss": 0.7742, - "step": 2483 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9460759904930722e-05, - "loss": 0.8172, - "step": 2484 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9460113597433187e-05, - "loss": 0.8831, - "step": 2485 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9459466913596126e-05, - "loss": 0.8136, - "step": 2486 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9458819853445264e-05, - "loss": 0.7492, - "step": 2487 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9458172417006347e-05, - "loss": 0.8371, - "step": 2488 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.945752460430513e-05, - "loss": 0.7904, - "step": 2489 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.945687641536738e-05, - "loss": 0.94, - "step": 2490 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9456227850218888e-05, - "loss": 0.6832, - "step": 2491 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9455578908885448e-05, - "loss": 0.7884, - "step": 2492 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9454929591392883e-05, - "loss": 0.8324, - "step": 2493 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9454279897767026e-05, - "loss": 0.863, - "step": 2494 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9453629828033713e-05, - "loss": 0.7269, - "step": 2495 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9452979382218814e-05, - "loss": 0.7665, - "step": 2496 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.94523285603482e-05, - "loss": 0.8032, - "step": 2497 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9451677362447762e-05, - "loss": 0.9522, - "step": 2498 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9451025788543404e-05, - "loss": 0.7684, - "step": 2499 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9450373838661055e-05, - "loss": 0.9257, - "step": 2500 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9449721512826644e-05, - "loss": 0.7362, - "step": 2501 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.944906881106612e-05, - "loss": 0.9079, - "step": 2502 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9448415733405455e-05, - "loss": 0.9668, - "step": 2503 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.944776227987063e-05, - "loss": 0.8286, - "step": 2504 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.944710845048763e-05, - "loss": 0.7976, - "step": 2505 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9446454245282474e-05, - "loss": 0.8094, - "step": 2506 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9445799664281184e-05, - "loss": 0.9436, - "step": 2507 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9445144707509804e-05, - "loss": 0.8126, - "step": 2508 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9444489374994388e-05, - "loss": 0.8632, - "step": 2509 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9443833666761e-05, - "loss": 0.844, - "step": 2510 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.944317758283574e-05, - "loss": 0.8026, - "step": 2511 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.944252112324469e-05, - "loss": 0.8154, - "step": 2512 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9441864288013973e-05, - "loss": 0.7955, - "step": 2513 - }, - { - "epoch": 0.45, - "grad_norm": 0.0, - "learning_rate": 1.9441207077169725e-05, - "loss": 0.7842, - "step": 2514 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9440549490738084e-05, - "loss": 0.7468, - "step": 2515 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9439891528745215e-05, - "loss": 0.8505, - "step": 2516 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9439233191217283e-05, - "loss": 0.7918, - "step": 2517 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943857447818049e-05, - "loss": 0.8914, - "step": 2518 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943791538966103e-05, - "loss": 0.8139, - "step": 2519 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943725592568513e-05, - "loss": 0.8275, - "step": 2520 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943659608627903e-05, - "loss": 0.7505, - "step": 2521 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943593587146896e-05, - "loss": 0.9976, - "step": 2522 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9435275281281202e-05, - "loss": 0.752, - "step": 2523 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9434614315742028e-05, - "loss": 0.9343, - "step": 2524 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9433952974877733e-05, - "loss": 0.6818, - "step": 2525 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9433291258714634e-05, - "loss": 0.9002, - "step": 2526 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9432629167279043e-05, - "loss": 0.677, - "step": 2527 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9431966700597305e-05, - "loss": 0.7781, - "step": 2528 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9431303858695777e-05, - "loss": 0.8353, - "step": 2529 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.943064064160082e-05, - "loss": 0.7052, - "step": 2530 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9429977049338825e-05, - "loss": 0.8859, - "step": 2531 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.942931308193619e-05, - "loss": 0.7706, - "step": 2532 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9428648739419326e-05, - "loss": 0.8352, - "step": 2533 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9427984021814663e-05, - "loss": 0.8594, - "step": 2534 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9427318929148647e-05, - "loss": 0.9811, - "step": 2535 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.942665346144773e-05, - "loss": 0.7408, - "step": 2536 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9425987618738392e-05, - "loss": 0.8117, - "step": 2537 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9425321401047118e-05, - "loss": 0.8097, - "step": 2538 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9424654808400413e-05, - "loss": 0.8163, - "step": 2539 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9423987840824792e-05, - "loss": 0.9173, - "step": 2540 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9423320498346792e-05, - "loss": 0.8869, - "step": 2541 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.942265278099296e-05, - "loss": 0.7738, - "step": 2542 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.942198468878986e-05, - "loss": 0.8066, - "step": 2543 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9421316221764065e-05, - "loss": 0.7128, - "step": 2544 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9420647379942172e-05, - "loss": 0.883, - "step": 2545 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941997816335079e-05, - "loss": 0.8549, - "step": 2546 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941930857201654e-05, - "loss": 0.8903, - "step": 2547 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9418638605966054e-05, - "loss": 0.9804, - "step": 2548 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941796826522599e-05, - "loss": 0.7307, - "step": 2549 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9417297549823018e-05, - "loss": 0.9078, - "step": 2550 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9416626459783816e-05, - "loss": 0.8059, - "step": 2551 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941595499513508e-05, - "loss": 0.8872, - "step": 2552 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9415283155903526e-05, - "loss": 0.7631, - "step": 2553 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9414610942115878e-05, - "loss": 0.7956, - "step": 2554 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9413938353798877e-05, - "loss": 0.8908, - "step": 2555 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9413265390979282e-05, - "loss": 0.7251, - "step": 2556 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9412592053683867e-05, - "loss": 0.7731, - "step": 2557 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941191834193941e-05, - "loss": 0.8492, - "step": 2558 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.941124425577272e-05, - "loss": 0.7749, - "step": 2559 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9410569795210613e-05, - "loss": 0.8519, - "step": 2560 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.940989496027992e-05, - "loss": 0.8412, - "step": 2561 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.940921975100748e-05, - "loss": 0.858, - "step": 2562 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.940854416742016e-05, - "loss": 0.8684, - "step": 2563 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.940786820954484e-05, - "loss": 0.7951, - "step": 2564 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9407191877408404e-05, - "loss": 0.7465, - "step": 2565 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9406515171037757e-05, - "loss": 0.8507, - "step": 2566 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9405838090459826e-05, - "loss": 0.8597, - "step": 2567 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.940516063570154e-05, - "loss": 0.863, - "step": 2568 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9404482806789854e-05, - "loss": 0.736, - "step": 2569 - }, - { - "epoch": 0.46, - "grad_norm": 0.0, - "learning_rate": 1.9403804603751732e-05, - "loss": 0.9969, - "step": 2570 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.940312602661415e-05, - "loss": 0.8506, - "step": 2571 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9402447075404107e-05, - "loss": 0.8069, - "step": 2572 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9401767750148615e-05, - "loss": 0.7121, - "step": 2573 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9401088050874693e-05, - "loss": 0.8627, - "step": 2574 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9400407977609382e-05, - "loss": 0.8694, - "step": 2575 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939972753037974e-05, - "loss": 0.9711, - "step": 2576 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9399046709212834e-05, - "loss": 0.9036, - "step": 2577 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939836551413575e-05, - "loss": 0.765, - "step": 2578 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9397683945175584e-05, - "loss": 0.7304, - "step": 2579 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939700200235945e-05, - "loss": 0.7902, - "step": 2580 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939631968571448e-05, - "loss": 0.9698, - "step": 2581 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9395636995267816e-05, - "loss": 0.7829, - "step": 2582 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939495393104662e-05, - "loss": 0.9126, - "step": 2583 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9394270493078058e-05, - "loss": 0.8136, - "step": 2584 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9393586681389324e-05, - "loss": 0.9926, - "step": 2585 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9392902496007617e-05, - "loss": 0.7266, - "step": 2586 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9392217936960162e-05, - "loss": 0.8215, - "step": 2587 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9391533004274185e-05, - "loss": 0.6757, - "step": 2588 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9390847697976937e-05, - "loss": 0.9507, - "step": 2589 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.939016201809568e-05, - "loss": 0.8452, - "step": 2590 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938947596465769e-05, - "loss": 0.7076, - "step": 2591 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9388789537690263e-05, - "loss": 0.8688, - "step": 2592 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9388102737220706e-05, - "loss": 0.8302, - "step": 2593 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938741556327634e-05, - "loss": 0.7095, - "step": 2594 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9386728015884495e-05, - "loss": 0.7597, - "step": 2595 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9386040095072533e-05, - "loss": 0.7579, - "step": 2596 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938535180086782e-05, - "loss": 0.841, - "step": 2597 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938466313329773e-05, - "loss": 0.848, - "step": 2598 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9383974092389666e-05, - "loss": 0.872, - "step": 2599 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9383284678171035e-05, - "loss": 0.8401, - "step": 2600 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9382594890669266e-05, - "loss": 0.76, - "step": 2601 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9381904729911797e-05, - "loss": 0.6372, - "step": 2602 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938121419592609e-05, - "loss": 0.807, - "step": 2603 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.938052328873961e-05, - "loss": 0.8151, - "step": 2604 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937983200837984e-05, - "loss": 0.8714, - "step": 2605 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9379140354874287e-05, - "loss": 0.7823, - "step": 2606 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9378448328250467e-05, - "loss": 0.9788, - "step": 2607 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.93777559285359e-05, - "loss": 0.7652, - "step": 2608 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937706315575814e-05, - "loss": 0.8836, - "step": 2609 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9376370009944747e-05, - "loss": 0.8479, - "step": 2610 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937567649112329e-05, - "loss": 0.7424, - "step": 2611 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9374982599321358e-05, - "loss": 0.8216, - "step": 2612 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937428833456656e-05, - "loss": 0.8135, - "step": 2613 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9373593696886516e-05, - "loss": 0.8166, - "step": 2614 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9372898686308855e-05, - "loss": 0.8214, - "step": 2615 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9372203302861227e-05, - "loss": 0.7623, - "step": 2616 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9371507546571294e-05, - "loss": 0.8768, - "step": 2617 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937081141746674e-05, - "loss": 0.7721, - "step": 2618 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.937011491557525e-05, - "loss": 0.6882, - "step": 2619 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9369418040924543e-05, - "loss": 0.7065, - "step": 2620 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.936872079354233e-05, - "loss": 0.8526, - "step": 2621 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9368023173456357e-05, - "loss": 0.8615, - "step": 2622 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9367325180694373e-05, - "loss": 0.7803, - "step": 2623 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.9366626815284146e-05, - "loss": 0.9197, - "step": 2624 - }, - { - "epoch": 0.47, - "grad_norm": 0.0, - "learning_rate": 1.936592807725346e-05, - "loss": 0.7416, - "step": 2625 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.936522896663011e-05, - "loss": 0.9625, - "step": 2626 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9364529483441907e-05, - "loss": 0.8676, - "step": 2627 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9363829627716678e-05, - "loss": 0.881, - "step": 2628 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9363129399482266e-05, - "loss": 0.7952, - "step": 2629 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.936242879876653e-05, - "loss": 0.9307, - "step": 2630 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9361727825597334e-05, - "loss": 0.7803, - "step": 2631 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9361026480002567e-05, - "loss": 0.9492, - "step": 2632 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9360324762010135e-05, - "loss": 0.7409, - "step": 2633 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9359622671647945e-05, - "loss": 0.8263, - "step": 2634 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.935892020894393e-05, - "loss": 0.8553, - "step": 2635 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.935821737392604e-05, - "loss": 0.6817, - "step": 2636 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.935751416662223e-05, - "loss": 0.7069, - "step": 2637 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9356810587060475e-05, - "loss": 0.9058, - "step": 2638 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9356106635268767e-05, - "loss": 0.6489, - "step": 2639 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9355402311275108e-05, - "loss": 0.7742, - "step": 2640 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9354697615107518e-05, - "loss": 0.7387, - "step": 2641 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9353992546794034e-05, - "loss": 0.777, - "step": 2642 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.93532871063627e-05, - "loss": 0.6944, - "step": 2643 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.935258129384158e-05, - "loss": 0.7969, - "step": 2644 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.935187510925876e-05, - "loss": 0.6416, - "step": 2645 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9351168552642324e-05, - "loss": 0.764, - "step": 2646 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9350461624020384e-05, - "loss": 0.8273, - "step": 2647 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.934975432342106e-05, - "loss": 0.6885, - "step": 2648 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9349046650872493e-05, - "loss": 0.7811, - "step": 2649 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9348338606402837e-05, - "loss": 0.8194, - "step": 2650 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9347630190040254e-05, - "loss": 0.7647, - "step": 2651 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.934692140181293e-05, - "loss": 0.9247, - "step": 2652 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9346212241749062e-05, - "loss": 0.8794, - "step": 2653 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9345502709876858e-05, - "loss": 0.8221, - "step": 2654 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9344792806224546e-05, - "loss": 0.9517, - "step": 2655 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9344082530820367e-05, - "loss": 0.8892, - "step": 2656 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.934337188369258e-05, - "loss": 0.797, - "step": 2657 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9342660864869453e-05, - "loss": 0.8864, - "step": 2658 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.934194947437927e-05, - "loss": 0.7868, - "step": 2659 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9341237712250336e-05, - "loss": 0.8063, - "step": 2660 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9340525578510964e-05, - "loss": 0.8249, - "step": 2661 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.933981307318948e-05, - "loss": 0.8063, - "step": 2662 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9339100196314233e-05, - "loss": 0.7852, - "step": 2663 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.933838694791358e-05, - "loss": 0.844, - "step": 2664 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.93376733280159e-05, - "loss": 1.0046, - "step": 2665 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9336959336649578e-05, - "loss": 0.7225, - "step": 2666 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9336244973843015e-05, - "loss": 0.7873, - "step": 2667 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.933553023962464e-05, - "loss": 0.9669, - "step": 2668 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9334815134022873e-05, - "loss": 0.9053, - "step": 2669 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.933409965706617e-05, - "loss": 0.8506, - "step": 2670 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9333383808782993e-05, - "loss": 0.9476, - "step": 2671 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.933266758920182e-05, - "loss": 0.7818, - "step": 2672 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9331950998351142e-05, - "loss": 0.8521, - "step": 2673 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9331234036259466e-05, - "loss": 0.8461, - "step": 2674 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9330516702955314e-05, - "loss": 0.902, - "step": 2675 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9329798998467226e-05, - "loss": 0.8737, - "step": 2676 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9329080922823747e-05, - "loss": 0.8125, - "step": 2677 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9328362476053454e-05, - "loss": 0.8945, - "step": 2678 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9327643658184917e-05, - "loss": 0.8375, - "step": 2679 - }, - { - "epoch": 0.48, - "grad_norm": 0.0, - "learning_rate": 1.9326924469246734e-05, - "loss": 0.7932, - "step": 2680 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.932620490926752e-05, - "loss": 0.9634, - "step": 2681 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9325484978275897e-05, - "loss": 0.8831, - "step": 2682 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.932476467630051e-05, - "loss": 0.8064, - "step": 2683 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9324044003370006e-05, - "loss": 0.9054, - "step": 2684 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.932332295951306e-05, - "loss": 0.8758, - "step": 2685 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9322601544758357e-05, - "loss": 0.7928, - "step": 2686 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9321879759134588e-05, - "loss": 0.7415, - "step": 2687 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.932115760267048e-05, - "loss": 0.8097, - "step": 2688 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9320435075394753e-05, - "loss": 0.8952, - "step": 2689 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9319712177336146e-05, - "loss": 0.8031, - "step": 2690 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.931898890852343e-05, - "loss": 0.7395, - "step": 2691 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9318265268985367e-05, - "loss": 0.7852, - "step": 2692 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9317541258750752e-05, - "loss": 0.8253, - "step": 2693 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.931681687784838e-05, - "loss": 0.8085, - "step": 2694 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9316092126307074e-05, - "loss": 0.9368, - "step": 2695 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9315367004155663e-05, - "loss": 0.7009, - "step": 2696 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9314641511422995e-05, - "loss": 0.8483, - "step": 2697 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9313915648137928e-05, - "loss": 0.7886, - "step": 2698 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9313189414329344e-05, - "loss": 0.7645, - "step": 2699 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.931246281002613e-05, - "loss": 0.9025, - "step": 2700 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9311735835257196e-05, - "loss": 0.9145, - "step": 2701 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9311008490051454e-05, - "loss": 0.9176, - "step": 2702 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9310280774437845e-05, - "loss": 0.8097, - "step": 2703 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9309552688445317e-05, - "loss": 0.853, - "step": 2704 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9308824232102833e-05, - "loss": 0.8163, - "step": 2705 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9308095405439376e-05, - "loss": 0.8516, - "step": 2706 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930736620848394e-05, - "loss": 0.7043, - "step": 2707 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930663664126553e-05, - "loss": 0.9323, - "step": 2708 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930590670381317e-05, - "loss": 0.8143, - "step": 2709 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9305176396155904e-05, - "loss": 0.8303, - "step": 2710 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9304445718322778e-05, - "loss": 0.8598, - "step": 2711 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930371467034286e-05, - "loss": 0.7452, - "step": 2712 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9302983252245234e-05, - "loss": 0.8016, - "step": 2713 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9302251464059e-05, - "loss": 0.8249, - "step": 2714 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9301519305813267e-05, - "loss": 0.8239, - "step": 2715 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930078677753716e-05, - "loss": 0.7553, - "step": 2716 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.930005387925982e-05, - "loss": 0.7196, - "step": 2717 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.929932061101041e-05, - "loss": 0.8505, - "step": 2718 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9298586972818092e-05, - "loss": 1.0043, - "step": 2719 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9297852964712057e-05, - "loss": 0.9266, - "step": 2720 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.92971185867215e-05, - "loss": 0.842, - "step": 2721 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9296383838875638e-05, - "loss": 0.7295, - "step": 2722 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9295648721203706e-05, - "loss": 0.7236, - "step": 2723 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.929491323373494e-05, - "loss": 0.8591, - "step": 2724 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9294177376498603e-05, - "loss": 0.8375, - "step": 2725 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.929344114952397e-05, - "loss": 0.9015, - "step": 2726 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9292704552840326e-05, - "loss": 0.8302, - "step": 2727 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9291967586476977e-05, - "loss": 0.8699, - "step": 2728 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.929123025046324e-05, - "loss": 0.8212, - "step": 2729 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9290492544828443e-05, - "loss": 0.8943, - "step": 2730 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9289754469601937e-05, - "loss": 0.6948, - "step": 2731 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9289016024813086e-05, - "loss": 0.8606, - "step": 2732 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9288277210491265e-05, - "loss": 0.7542, - "step": 2733 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.9287538026665866e-05, - "loss": 0.7788, - "step": 2734 - }, - { - "epoch": 0.49, - "grad_norm": 0.0, - "learning_rate": 1.928679847336629e-05, - "loss": 0.7005, - "step": 2735 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9286058550621967e-05, - "loss": 0.9095, - "step": 2736 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9285318258462325e-05, - "loss": 0.7836, - "step": 2737 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9284577596916812e-05, - "loss": 0.656, - "step": 2738 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.92838365660149e-05, - "loss": 0.7044, - "step": 2739 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9283095165786067e-05, - "loss": 0.9641, - "step": 2740 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9282353396259802e-05, - "loss": 0.9022, - "step": 2741 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.928161125746562e-05, - "loss": 0.8497, - "step": 2742 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.928086874943304e-05, - "loss": 0.7829, - "step": 2743 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9280125872191605e-05, - "loss": 0.8067, - "step": 2744 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9279382625770863e-05, - "loss": 0.7485, - "step": 2745 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9278639010200383e-05, - "loss": 0.794, - "step": 2746 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.927789502550975e-05, - "loss": 0.9613, - "step": 2747 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9277150671728554e-05, - "loss": 0.7886, - "step": 2748 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9276405948886417e-05, - "loss": 0.9183, - "step": 2749 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.927566085701296e-05, - "loss": 0.7886, - "step": 2750 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9274915396137823e-05, - "loss": 0.8012, - "step": 2751 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9274169566290658e-05, - "loss": 0.8174, - "step": 2752 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9273423367501145e-05, - "loss": 0.8701, - "step": 2753 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9272676799798965e-05, - "loss": 0.8754, - "step": 2754 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9271929863213815e-05, - "loss": 0.9401, - "step": 2755 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.927118255777541e-05, - "loss": 0.9454, - "step": 2756 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9270434883513485e-05, - "loss": 0.8871, - "step": 2757 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9269686840457774e-05, - "loss": 0.8933, - "step": 2758 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9268938428638043e-05, - "loss": 0.8382, - "step": 2759 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9268189648084064e-05, - "loss": 0.781, - "step": 2760 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926744049882562e-05, - "loss": 0.9178, - "step": 2761 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926669098089252e-05, - "loss": 0.7064, - "step": 2762 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926594109431458e-05, - "loss": 0.7394, - "step": 2763 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9265190839121625e-05, - "loss": 0.7691, - "step": 2764 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9264440215343508e-05, - "loss": 0.7879, - "step": 2765 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926368922301009e-05, - "loss": 0.8337, - "step": 2766 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9262937862151245e-05, - "loss": 1.0468, - "step": 2767 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926218613279686e-05, - "loss": 0.8499, - "step": 2768 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9261434034976848e-05, - "loss": 0.9202, - "step": 2769 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.926068156872112e-05, - "loss": 0.8541, - "step": 2770 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9259928734059618e-05, - "loss": 0.7057, - "step": 2771 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9259175531022287e-05, - "loss": 0.8253, - "step": 2772 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9258421959639092e-05, - "loss": 0.8013, - "step": 2773 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.925766801994001e-05, - "loss": 0.9574, - "step": 2774 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9256913711955035e-05, - "loss": 0.7044, - "step": 2775 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9256159035714173e-05, - "loss": 0.8294, - "step": 2776 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9255403991247452e-05, - "loss": 0.8369, - "step": 2777 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.92546485785849e-05, - "loss": 0.8611, - "step": 2778 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.925389279775658e-05, - "loss": 0.9458, - "step": 2779 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9253136648792545e-05, - "loss": 0.6887, - "step": 2780 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9252380131722884e-05, - "loss": 0.7758, - "step": 2781 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9251623246577695e-05, - "loss": 0.843, - "step": 2782 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.925086599338708e-05, - "loss": 0.7527, - "step": 2783 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.925010837218117e-05, - "loss": 0.9203, - "step": 2784 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9249350382990103e-05, - "loss": 0.8709, - "step": 2785 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9248592025844034e-05, - "loss": 0.6548, - "step": 2786 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.924783330077313e-05, - "loss": 0.7294, - "step": 2787 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9247074207807573e-05, - "loss": 0.8043, - "step": 2788 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.924631474697756e-05, - "loss": 0.7819, - "step": 2789 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9245554918313313e-05, - "loss": 0.7966, - "step": 2790 - }, - { - "epoch": 0.5, - "grad_norm": 0.0, - "learning_rate": 1.9244794721845048e-05, - "loss": 0.8063, - "step": 2791 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9244034157603016e-05, - "loss": 0.7396, - "step": 2792 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9243273225617466e-05, - "loss": 0.9259, - "step": 2793 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9242511925918675e-05, - "loss": 0.7997, - "step": 2794 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9241750258536925e-05, - "loss": 0.8492, - "step": 2795 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9240988223502516e-05, - "loss": 0.7873, - "step": 2796 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9240225820845766e-05, - "loss": 0.8205, - "step": 2797 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9239463050597005e-05, - "loss": 0.7833, - "step": 2798 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9238699912786573e-05, - "loss": 0.7645, - "step": 2799 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9237936407444836e-05, - "loss": 0.9602, - "step": 2800 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.923717253460216e-05, - "loss": 0.9282, - "step": 2801 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9236408294288936e-05, - "loss": 0.7683, - "step": 2802 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.923564368653557e-05, - "loss": 0.7629, - "step": 2803 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9234878711372473e-05, - "loss": 0.7979, - "step": 2804 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.923411336883008e-05, - "loss": 0.835, - "step": 2805 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9233347658938842e-05, - "loss": 0.763, - "step": 2806 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.923258158172922e-05, - "loss": 0.9478, - "step": 2807 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9231815137231676e-05, - "loss": 0.9278, - "step": 2808 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9231048325476717e-05, - "loss": 0.8029, - "step": 2809 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9230281146494842e-05, - "loss": 0.937, - "step": 2810 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9229513600316568e-05, - "loss": 0.8693, - "step": 2811 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9228745686972435e-05, - "loss": 0.7035, - "step": 2812 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9227977406492992e-05, - "loss": 0.9115, - "step": 2813 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9227208758908794e-05, - "loss": 0.7949, - "step": 2814 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9226439744250425e-05, - "loss": 0.9676, - "step": 2815 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9225670362548478e-05, - "loss": 0.8944, - "step": 2816 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9224900613833558e-05, - "loss": 0.7207, - "step": 2817 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.922413049813629e-05, - "loss": 0.8526, - "step": 2818 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.922336001548731e-05, - "loss": 0.9221, - "step": 2819 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9222589165917265e-05, - "loss": 0.712, - "step": 2820 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9221817949456828e-05, - "loss": 0.8521, - "step": 2821 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9221046366136673e-05, - "loss": 0.78, - "step": 2822 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9220274415987498e-05, - "loss": 0.976, - "step": 2823 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921950209904001e-05, - "loss": 0.8474, - "step": 2824 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9218729415324935e-05, - "loss": 0.8162, - "step": 2825 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921795636487301e-05, - "loss": 0.748, - "step": 2826 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921718294771499e-05, - "loss": 0.7454, - "step": 2827 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9216409163881643e-05, - "loss": 0.8913, - "step": 2828 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921563501340375e-05, - "loss": 0.8391, - "step": 2829 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921486049631211e-05, - "loss": 0.8756, - "step": 2830 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9214085612637537e-05, - "loss": 0.7309, - "step": 2831 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9213310362410848e-05, - "loss": 0.8329, - "step": 2832 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9212534745662893e-05, - "loss": 0.7862, - "step": 2833 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9211758762424523e-05, - "loss": 0.6751, - "step": 2834 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.921098241272661e-05, - "loss": 0.7987, - "step": 2835 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9210205696600036e-05, - "loss": 0.8873, - "step": 2836 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.92094286140757e-05, - "loss": 0.8166, - "step": 2837 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.920865116518452e-05, - "loss": 0.9751, - "step": 2838 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9207873349957418e-05, - "loss": 0.8654, - "step": 2839 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9207095168425343e-05, - "loss": 0.7912, - "step": 2840 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9206316620619248e-05, - "loss": 0.885, - "step": 2841 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9205537706570106e-05, - "loss": 0.761, - "step": 2842 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.920475842630891e-05, - "loss": 0.9846, - "step": 2843 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9203978779866647e-05, - "loss": 0.7738, - "step": 2844 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9203198767274343e-05, - "loss": 0.8023, - "step": 2845 - }, - { - "epoch": 0.51, - "grad_norm": 0.0, - "learning_rate": 1.9202418388563026e-05, - "loss": 0.8479, - "step": 2846 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9201637643763743e-05, - "loss": 0.9524, - "step": 2847 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9200856532907554e-05, - "loss": 0.8546, - "step": 2848 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9200075056025525e-05, - "loss": 0.9009, - "step": 2849 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.919929321314875e-05, - "loss": 0.8502, - "step": 2850 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9198511004308334e-05, - "loss": 0.7758, - "step": 2851 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9197728429535392e-05, - "loss": 0.7311, - "step": 2852 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9196945488861053e-05, - "loss": 0.6973, - "step": 2853 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.919616218231647e-05, - "loss": 0.7917, - "step": 2854 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9195378509932803e-05, - "loss": 0.8761, - "step": 2855 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9194594471741225e-05, - "loss": 0.8323, - "step": 2856 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9193810067772925e-05, - "loss": 0.9148, - "step": 2857 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9193025298059113e-05, - "loss": 0.8192, - "step": 2858 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.919224016263101e-05, - "loss": 0.9112, - "step": 2859 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.919145466151984e-05, - "loss": 0.742, - "step": 2860 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.919066879475686e-05, - "loss": 0.707, - "step": 2861 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.918988256237333e-05, - "loss": 0.7948, - "step": 2862 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.918909596440053e-05, - "loss": 0.6953, - "step": 2863 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9188309000869752e-05, - "loss": 0.8757, - "step": 2864 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.91875216718123e-05, - "loss": 0.7866, - "step": 2865 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9186733977259497e-05, - "loss": 0.9571, - "step": 2866 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9185945917242677e-05, - "loss": 0.7122, - "step": 2867 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9185157491793194e-05, - "loss": 0.657, - "step": 2868 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.918436870094241e-05, - "loss": 0.8471, - "step": 2869 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9183579544721708e-05, - "loss": 0.6927, - "step": 2870 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9182790023162473e-05, - "loss": 0.9009, - "step": 2871 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9182000136296123e-05, - "loss": 0.8888, - "step": 2872 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9181209884154078e-05, - "loss": 0.9199, - "step": 2873 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9180419266767777e-05, - "loss": 0.8098, - "step": 2874 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.917962828416867e-05, - "loss": 0.7207, - "step": 2875 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9178836936388227e-05, - "loss": 0.8821, - "step": 2876 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.917804522345792e-05, - "loss": 0.6222, - "step": 2877 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9177253145409257e-05, - "loss": 0.9511, - "step": 2878 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.917646070227374e-05, - "loss": 0.765, - "step": 2879 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9175667894082896e-05, - "loss": 0.8422, - "step": 2880 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9174874720868268e-05, - "loss": 0.7914, - "step": 2881 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.91740811826614e-05, - "loss": 0.9092, - "step": 2882 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.917328727949387e-05, - "loss": 0.9017, - "step": 2883 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9172493011397256e-05, - "loss": 0.7511, - "step": 2884 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9171698378403156e-05, - "loss": 0.9262, - "step": 2885 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9170903380543183e-05, - "loss": 0.8083, - "step": 2886 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9170108017848966e-05, - "loss": 0.8423, - "step": 2887 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.916931229035214e-05, - "loss": 0.741, - "step": 2888 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9168516198084365e-05, - "loss": 0.9414, - "step": 2889 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9167719741077306e-05, - "loss": 0.811, - "step": 2890 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9166922919362654e-05, - "loss": 0.7386, - "step": 2891 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.91661257329721e-05, - "loss": 0.8624, - "step": 2892 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9165328181937365e-05, - "loss": 0.8825, - "step": 2893 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9164530266290172e-05, - "loss": 0.7823, - "step": 2894 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9163731986062268e-05, - "loss": 0.7082, - "step": 2895 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9162933341285407e-05, - "loss": 0.8689, - "step": 2896 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.916213433199136e-05, - "loss": 0.8537, - "step": 2897 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9161334958211912e-05, - "loss": 0.7279, - "step": 2898 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9160535219978867e-05, - "loss": 0.9139, - "step": 2899 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.915973511732404e-05, - "loss": 0.7202, - "step": 2900 - }, - { - "epoch": 0.52, - "grad_norm": 0.0, - "learning_rate": 1.9158934650279252e-05, - "loss": 0.8934, - "step": 2901 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9158133818876358e-05, - "loss": 0.8468, - "step": 2902 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9157332623147212e-05, - "loss": 0.7725, - "step": 2903 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9156531063123687e-05, - "loss": 0.8209, - "step": 2904 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.915572913883767e-05, - "loss": 0.846, - "step": 2905 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9154926850321064e-05, - "loss": 0.8121, - "step": 2906 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9154124197605782e-05, - "loss": 0.9474, - "step": 2907 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.915332118072376e-05, - "loss": 0.7245, - "step": 2908 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.915251779970694e-05, - "loss": 0.7663, - "step": 2909 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9151714054587282e-05, - "loss": 0.8262, - "step": 2910 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9150909945396764e-05, - "loss": 0.899, - "step": 2911 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9150105472167368e-05, - "loss": 0.9078, - "step": 2912 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9149300634931106e-05, - "loss": 0.8064, - "step": 2913 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9148495433719986e-05, - "loss": 0.8423, - "step": 2914 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9147689868566046e-05, - "loss": 0.8413, - "step": 2915 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9146883939501337e-05, - "loss": 0.9015, - "step": 2916 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.914607764655791e-05, - "loss": 1.0214, - "step": 2917 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.914527098976785e-05, - "loss": 0.8618, - "step": 2918 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.914446396916324e-05, - "loss": 0.8804, - "step": 2919 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9143656584776192e-05, - "loss": 0.8538, - "step": 2920 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.914284883663882e-05, - "loss": 0.8262, - "step": 2921 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9142040724783254e-05, - "loss": 0.9577, - "step": 2922 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9141232249241654e-05, - "loss": 1.0041, - "step": 2923 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.914042341004617e-05, - "loss": 0.8698, - "step": 2924 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9139614207228986e-05, - "loss": 0.8081, - "step": 2925 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.913880464082229e-05, - "loss": 0.6925, - "step": 2926 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9137994710858292e-05, - "loss": 0.7329, - "step": 2927 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9137184417369213e-05, - "loss": 0.905, - "step": 2928 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.913637376038728e-05, - "loss": 0.8799, - "step": 2929 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.913556273994475e-05, - "loss": 0.7654, - "step": 2930 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9134751356073882e-05, - "loss": 0.8206, - "step": 2931 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9133939608806957e-05, - "loss": 0.738, - "step": 2932 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9133127498176267e-05, - "loss": 0.8012, - "step": 2933 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.913231502421412e-05, - "loss": 0.8173, - "step": 2934 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9131502186952836e-05, - "loss": 0.9494, - "step": 2935 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.913068898642475e-05, - "loss": 0.8448, - "step": 2936 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.912987542266222e-05, - "loss": 0.8422, - "step": 2937 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9129061495697602e-05, - "loss": 0.7835, - "step": 2938 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.912824720556328e-05, - "loss": 0.8063, - "step": 2939 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9127432552291645e-05, - "loss": 0.8108, - "step": 2940 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9126617535915107e-05, - "loss": 0.893, - "step": 2941 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9125802156466092e-05, - "loss": 0.8612, - "step": 2942 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.912498641397703e-05, - "loss": 0.7844, - "step": 2943 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9124170308480377e-05, - "loss": 0.8034, - "step": 2944 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.91233538400086e-05, - "loss": 0.7257, - "step": 2945 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9122537008594176e-05, - "loss": 0.9376, - "step": 2946 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9121719814269603e-05, - "loss": 0.8498, - "step": 2947 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.912090225706739e-05, - "loss": 0.7903, - "step": 2948 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.912008433702006e-05, - "loss": 0.7241, - "step": 2949 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.911926605416015e-05, - "loss": 0.778, - "step": 2950 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9118447408520214e-05, - "loss": 0.7502, - "step": 2951 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9117628400132823e-05, - "loss": 0.8094, - "step": 2952 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.911680902903055e-05, - "loss": 0.8386, - "step": 2953 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9115989295246003e-05, - "loss": 0.8584, - "step": 2954 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9115169198811777e-05, - "loss": 0.8059, - "step": 2955 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.911434873976051e-05, - "loss": 0.7473, - "step": 2956 - }, - { - "epoch": 0.53, - "grad_norm": 0.0, - "learning_rate": 1.9113527918124836e-05, - "loss": 0.899, - "step": 2957 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.911270673393741e-05, - "loss": 0.9882, - "step": 2958 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9111885187230895e-05, - "loss": 0.8904, - "step": 2959 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.911106327803798e-05, - "loss": 0.905, - "step": 2960 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9110241006391363e-05, - "loss": 0.7834, - "step": 2961 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.910941837232375e-05, - "loss": 0.8252, - "step": 2962 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9108595375867867e-05, - "loss": 0.7225, - "step": 2963 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9107772017056455e-05, - "loss": 0.8865, - "step": 2964 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9106948295922273e-05, - "loss": 0.9124, - "step": 2965 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9106124212498085e-05, - "loss": 0.8194, - "step": 2966 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9105299766816676e-05, - "loss": 0.8056, - "step": 2967 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9104474958910847e-05, - "loss": 0.964, - "step": 2968 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9103649788813407e-05, - "loss": 0.8457, - "step": 2969 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9102824256557178e-05, - "loss": 0.8694, - "step": 2970 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.910199836217501e-05, - "loss": 0.8178, - "step": 2971 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9101172105699754e-05, - "loss": 0.8396, - "step": 2972 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9100345487164278e-05, - "loss": 0.8664, - "step": 2973 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.909951850660147e-05, - "loss": 0.8305, - "step": 2974 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.909869116404423e-05, - "loss": 0.9316, - "step": 2975 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9097863459525468e-05, - "loss": 0.8459, - "step": 2976 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.909703539307811e-05, - "loss": 0.8044, - "step": 2977 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.90962069647351e-05, - "loss": 0.6656, - "step": 2978 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9095378174529394e-05, - "loss": 0.8848, - "step": 2979 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9094549022493965e-05, - "loss": 0.7259, - "step": 2980 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9093719508661796e-05, - "loss": 0.8782, - "step": 2981 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9092889633065884e-05, - "loss": 0.7189, - "step": 2982 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.909205939573925e-05, - "loss": 0.8227, - "step": 2983 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9091228796714912e-05, - "loss": 0.8252, - "step": 2984 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.909039783602592e-05, - "loss": 0.7557, - "step": 2985 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9089566513705335e-05, - "loss": 0.8334, - "step": 2986 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9088734829786217e-05, - "loss": 0.912, - "step": 2987 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908790278430166e-05, - "loss": 0.7753, - "step": 2988 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9087070377284758e-05, - "loss": 0.9143, - "step": 2989 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9086237608768635e-05, - "loss": 0.8828, - "step": 2990 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908540447878641e-05, - "loss": 0.7951, - "step": 2991 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908457098737123e-05, - "loss": 0.7671, - "step": 2992 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908373713455626e-05, - "loss": 0.8055, - "step": 2993 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908290292037466e-05, - "loss": 0.788, - "step": 2994 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9082068344859623e-05, - "loss": 0.8623, - "step": 2995 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9081233408044346e-05, - "loss": 0.8404, - "step": 2996 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.908039810996205e-05, - "loss": 0.8702, - "step": 2997 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9079562450645962e-05, - "loss": 0.7979, - "step": 2998 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9078726430129323e-05, - "loss": 0.8003, - "step": 2999 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9077890048445397e-05, - "loss": 0.6706, - "step": 3000 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9077053305627453e-05, - "loss": 0.8679, - "step": 3001 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9076216201708776e-05, - "loss": 0.679, - "step": 3002 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9075378736722674e-05, - "loss": 0.8124, - "step": 3003 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9074540910702457e-05, - "loss": 0.9297, - "step": 3004 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.907370272368146e-05, - "loss": 0.7812, - "step": 3005 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9072864175693017e-05, - "loss": 0.8547, - "step": 3006 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.90720252667705e-05, - "loss": 0.7919, - "step": 3007 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9071185996947276e-05, - "loss": 0.8163, - "step": 3008 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.907034636625673e-05, - "loss": 0.8269, - "step": 3009 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.9069506374732272e-05, - "loss": 0.7657, - "step": 3010 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.906866602240731e-05, - "loss": 0.8233, - "step": 3011 - }, - { - "epoch": 0.54, - "grad_norm": 0.0, - "learning_rate": 1.906782530931528e-05, - "loss": 0.8647, - "step": 3012 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.906698423548963e-05, - "loss": 0.7611, - "step": 3013 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9066142800963806e-05, - "loss": 0.8566, - "step": 3014 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9065301005771294e-05, - "loss": 0.7488, - "step": 3015 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9064458849945574e-05, - "loss": 0.8395, - "step": 3016 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.906361633352016e-05, - "loss": 0.9102, - "step": 3017 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9062773456528555e-05, - "loss": 0.8412, - "step": 3018 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9061930219004302e-05, - "loss": 0.7025, - "step": 3019 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.906108662098094e-05, - "loss": 0.8452, - "step": 3020 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9060242662492026e-05, - "loss": 0.7673, - "step": 3021 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.905939834357114e-05, - "loss": 1.0717, - "step": 3022 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9058553664251866e-05, - "loss": 0.9446, - "step": 3023 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.905770862456781e-05, - "loss": 0.9445, - "step": 3024 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9056863224552594e-05, - "loss": 0.7398, - "step": 3025 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9056017464239834e-05, - "loss": 0.9276, - "step": 3026 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9055171343663192e-05, - "loss": 0.778, - "step": 3027 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9054324862856323e-05, - "loss": 0.8505, - "step": 3028 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9053478021852897e-05, - "loss": 0.7514, - "step": 3029 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9052630820686604e-05, - "loss": 0.8011, - "step": 3030 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.905178325939115e-05, - "loss": 0.8063, - "step": 3031 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9050935338000254e-05, - "loss": 0.8719, - "step": 3032 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9050087056547642e-05, - "loss": 0.7729, - "step": 3033 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9049238415067064e-05, - "loss": 0.8169, - "step": 3034 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9048389413592282e-05, - "loss": 0.6798, - "step": 3035 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9047540052157066e-05, - "loss": 0.8662, - "step": 3036 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9046690330795207e-05, - "loss": 0.8072, - "step": 3037 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9045840249540513e-05, - "loss": 0.7414, - "step": 3038 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9044989808426793e-05, - "loss": 0.793, - "step": 3039 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9044139007487886e-05, - "loss": 0.7507, - "step": 3040 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9043287846757633e-05, - "loss": 0.8779, - "step": 3041 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.90424363262699e-05, - "loss": 0.7889, - "step": 3042 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.904158444605856e-05, - "loss": 0.8297, - "step": 3043 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.90407322061575e-05, - "loss": 0.8907, - "step": 3044 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9039879606600626e-05, - "loss": 0.7473, - "step": 3045 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9039026647421854e-05, - "loss": 0.8575, - "step": 3046 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9038173328655116e-05, - "loss": 0.9535, - "step": 3047 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.903731965033436e-05, - "loss": 0.91, - "step": 3048 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.903646561249355e-05, - "loss": 0.7795, - "step": 3049 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9035611215166655e-05, - "loss": 0.8726, - "step": 3050 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9034756458387667e-05, - "loss": 0.8168, - "step": 3051 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.903390134219059e-05, - "loss": 0.7736, - "step": 3052 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9033045866609443e-05, - "loss": 0.9192, - "step": 3053 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9032190031678256e-05, - "loss": 0.7931, - "step": 3054 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9031333837431075e-05, - "loss": 0.8283, - "step": 3055 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9030477283901965e-05, - "loss": 0.8859, - "step": 3056 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9029620371125e-05, - "loss": 0.7761, - "step": 3057 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.902876309913426e-05, - "loss": 0.8487, - "step": 3058 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9027905467963865e-05, - "loss": 0.8281, - "step": 3059 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.902704747764792e-05, - "loss": 0.7477, - "step": 3060 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9026189128220565e-05, - "loss": 0.8208, - "step": 3061 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9025330419715943e-05, - "loss": 0.8321, - "step": 3062 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9024471352168216e-05, - "loss": 0.9869, - "step": 3063 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.902361192561156e-05, - "loss": 0.867, - "step": 3064 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.902275214008016e-05, - "loss": 0.6858, - "step": 3065 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.9021891995608225e-05, - "loss": 0.7322, - "step": 3066 - }, - { - "epoch": 0.55, - "grad_norm": 0.0, - "learning_rate": 1.902103149222997e-05, - "loss": 0.7409, - "step": 3067 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9020170629979632e-05, - "loss": 0.7713, - "step": 3068 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.901930940889145e-05, - "loss": 0.6938, - "step": 3069 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.901844782899969e-05, - "loss": 0.8549, - "step": 3070 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9017585890338626e-05, - "loss": 0.9106, - "step": 3071 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9016723592942545e-05, - "loss": 0.642, - "step": 3072 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9015860936845757e-05, - "loss": 0.8273, - "step": 3073 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9014997922082575e-05, - "loss": 1.0133, - "step": 3074 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9014134548687328e-05, - "loss": 0.8479, - "step": 3075 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9013270816694373e-05, - "loss": 0.9001, - "step": 3076 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.901240672613806e-05, - "loss": 0.7935, - "step": 3077 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9011542277052772e-05, - "loss": 0.6594, - "step": 3078 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9010677469472888e-05, - "loss": 0.8111, - "step": 3079 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900981230343282e-05, - "loss": 0.8684, - "step": 3080 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9008946778966992e-05, - "loss": 0.785, - "step": 3081 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900808089610982e-05, - "loss": 0.7999, - "step": 3082 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900721465489576e-05, - "loss": 0.7401, - "step": 3083 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900634805535927e-05, - "loss": 0.7904, - "step": 3084 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9005481097534825e-05, - "loss": 0.7431, - "step": 3085 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9004613781456917e-05, - "loss": 0.7614, - "step": 3086 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9003746107160047e-05, - "loss": 0.8337, - "step": 3087 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900287807467873e-05, - "loss": 0.8007, - "step": 3088 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9002009684047504e-05, - "loss": 0.9548, - "step": 3089 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.9001140935300912e-05, - "loss": 1.03, - "step": 3090 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.900027182847351e-05, - "loss": 0.8628, - "step": 3091 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8999402363599875e-05, - "loss": 0.7307, - "step": 3092 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8998532540714604e-05, - "loss": 0.8792, - "step": 3093 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.899766235985229e-05, - "loss": 0.8561, - "step": 3094 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.899679182104755e-05, - "loss": 0.7674, - "step": 3095 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8995920924335024e-05, - "loss": 0.8634, - "step": 3096 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.899504966974935e-05, - "loss": 0.919, - "step": 3097 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.899417805732519e-05, - "loss": 0.8871, - "step": 3098 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.899330608709722e-05, - "loss": 0.9166, - "step": 3099 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8992433759100128e-05, - "loss": 0.8578, - "step": 3100 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8991561073368618e-05, - "loss": 0.8281, - "step": 3101 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8990688029937402e-05, - "loss": 0.9528, - "step": 3102 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8989814628841215e-05, - "loss": 0.8613, - "step": 3103 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.89889408701148e-05, - "loss": 0.7277, - "step": 3104 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898806675379292e-05, - "loss": 0.7478, - "step": 3105 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898719227991035e-05, - "loss": 0.8491, - "step": 3106 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8986317448501873e-05, - "loss": 0.7897, - "step": 3107 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898544225960229e-05, - "loss": 0.7464, - "step": 3108 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8984566713246426e-05, - "loss": 0.8812, - "step": 3109 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8983690809469105e-05, - "loss": 0.8282, - "step": 3110 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8982814548305175e-05, - "loss": 0.8481, - "step": 3111 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898193792978949e-05, - "loss": 0.7792, - "step": 3112 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898106095395693e-05, - "loss": 0.825, - "step": 3113 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.898018362084238e-05, - "loss": 0.9076, - "step": 3114 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.897930593048074e-05, - "loss": 0.676, - "step": 3115 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8978427882906925e-05, - "loss": 0.7252, - "step": 3116 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8977549478155875e-05, - "loss": 0.9016, - "step": 3117 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8976670716262524e-05, - "loss": 0.7712, - "step": 3118 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8975791597261834e-05, - "loss": 0.8811, - "step": 3119 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8974912121188778e-05, - "loss": 0.7824, - "step": 3120 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.8974032288078344e-05, - "loss": 0.9009, - "step": 3121 - }, - { - "epoch": 0.56, - "grad_norm": 0.0, - "learning_rate": 1.897315209796553e-05, - "loss": 0.7372, - "step": 3122 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8972271550885355e-05, - "loss": 0.7535, - "step": 3123 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8971390646872847e-05, - "loss": 0.7947, - "step": 3124 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.897050938596305e-05, - "loss": 0.8665, - "step": 3125 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8969627768191025e-05, - "loss": 0.8698, - "step": 3126 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8968745793591833e-05, - "loss": 0.7289, - "step": 3127 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8967863462200577e-05, - "loss": 0.7377, - "step": 3128 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.896698077405235e-05, - "loss": 1.0052, - "step": 3129 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.896609772918226e-05, - "loss": 0.8464, - "step": 3130 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.896521432762545e-05, - "loss": 0.9219, - "step": 3131 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8964330569417055e-05, - "loss": 0.8725, - "step": 3132 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8963446454592227e-05, - "loss": 0.722, - "step": 3133 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.896256198318615e-05, - "loss": 0.8783, - "step": 3134 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8961677155234e-05, - "loss": 0.7182, - "step": 3135 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.896079197077098e-05, - "loss": 0.7161, - "step": 3136 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8959906429832308e-05, - "loss": 0.8681, - "step": 3137 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895902053245321e-05, - "loss": 0.8207, - "step": 3138 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8958134278668924e-05, - "loss": 0.8481, - "step": 3139 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895724766851471e-05, - "loss": 0.6999, - "step": 3140 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895636070202584e-05, - "loss": 0.7572, - "step": 3141 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.89554733792376e-05, - "loss": 0.6861, - "step": 3142 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8954585700185287e-05, - "loss": 0.9153, - "step": 3143 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895369766490421e-05, - "loss": 0.8101, - "step": 3144 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8952809273429707e-05, - "loss": 1.016, - "step": 3145 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895192052579711e-05, - "loss": 0.9423, - "step": 3146 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8951031422041786e-05, - "loss": 0.7611, - "step": 3147 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.895014196219909e-05, - "loss": 0.8099, - "step": 3148 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.894925214630442e-05, - "loss": 0.8379, - "step": 3149 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8948361974393166e-05, - "loss": 0.9492, - "step": 3150 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8947471446500745e-05, - "loss": 0.8694, - "step": 3151 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.894658056266258e-05, - "loss": 0.9028, - "step": 3152 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8945689322914113e-05, - "loss": 0.7652, - "step": 3153 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8944797727290804e-05, - "loss": 0.8327, - "step": 3154 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8943905775828116e-05, - "loss": 0.7569, - "step": 3155 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8943013468561534e-05, - "loss": 0.7809, - "step": 3156 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8942120805526555e-05, - "loss": 0.8512, - "step": 3157 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.894122778675869e-05, - "loss": 0.8021, - "step": 3158 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8940334412293472e-05, - "loss": 0.6776, - "step": 3159 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.893944068216643e-05, - "loss": 0.7571, - "step": 3160 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8938546596413128e-05, - "loss": 0.8294, - "step": 3161 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8937652155069126e-05, - "loss": 0.7494, - "step": 3162 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8936757358170013e-05, - "loss": 0.7257, - "step": 3163 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8935862205751383e-05, - "loss": 0.7417, - "step": 3164 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8934966697848844e-05, - "loss": 0.9135, - "step": 3165 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8934070834498025e-05, - "loss": 0.8189, - "step": 3166 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.893317461573456e-05, - "loss": 0.8483, - "step": 3167 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8932278041594113e-05, - "loss": 0.8355, - "step": 3168 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8931381112112337e-05, - "loss": 0.7972, - "step": 3169 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8930483827324925e-05, - "loss": 0.7983, - "step": 3170 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8929586187267564e-05, - "loss": 0.7463, - "step": 3171 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8928688191975966e-05, - "loss": 0.8319, - "step": 3172 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.892778984148586e-05, - "loss": 0.8408, - "step": 3173 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.892689113583298e-05, - "loss": 0.6901, - "step": 3174 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.8925992075053075e-05, - "loss": 0.8794, - "step": 3175 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.892509265918191e-05, - "loss": 0.8274, - "step": 3176 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.892419288825528e-05, - "loss": 0.8334, - "step": 3177 - }, - { - "epoch": 0.57, - "grad_norm": 0.0, - "learning_rate": 1.892329276230896e-05, - "loss": 0.8345, - "step": 3178 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8922392281378773e-05, - "loss": 0.853, - "step": 3179 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8921491445500533e-05, - "loss": 0.8527, - "step": 3180 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8920590254710084e-05, - "loss": 0.8079, - "step": 3181 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.891968870904327e-05, - "loss": 0.808, - "step": 3182 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8918786808535955e-05, - "loss": 0.8511, - "step": 3183 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8917884553224025e-05, - "loss": 0.8399, - "step": 3184 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8916981943143374e-05, - "loss": 0.9176, - "step": 3185 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8916078978329905e-05, - "loss": 0.7193, - "step": 3186 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8915175658819536e-05, - "loss": 0.8741, - "step": 3187 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.891427198464821e-05, - "loss": 0.8769, - "step": 3188 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.891336795585187e-05, - "loss": 0.8285, - "step": 3189 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8912463572466487e-05, - "loss": 0.7017, - "step": 3190 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8911558834528033e-05, - "loss": 0.8223, - "step": 3191 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8910653742072504e-05, - "loss": 0.7935, - "step": 3192 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8909748295135902e-05, - "loss": 0.9145, - "step": 3193 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.890884249375425e-05, - "loss": 0.8524, - "step": 3194 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8907936337963582e-05, - "loss": 0.8723, - "step": 3195 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.890702982779995e-05, - "loss": 0.7779, - "step": 3196 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8906122963299405e-05, - "loss": 0.8184, - "step": 3197 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.890521574449804e-05, - "loss": 0.7753, - "step": 3198 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.890430817143193e-05, - "loss": 0.932, - "step": 3199 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8903400244137192e-05, - "loss": 0.7717, - "step": 3200 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.890249196264994e-05, - "loss": 0.8294, - "step": 3201 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8901583327006304e-05, - "loss": 0.7755, - "step": 3202 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8900674337242437e-05, - "loss": 0.791, - "step": 3203 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8899764993394494e-05, - "loss": 0.8639, - "step": 3204 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8898855295498655e-05, - "loss": 0.7714, - "step": 3205 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8897945243591108e-05, - "loss": 0.8199, - "step": 3206 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8897034837708058e-05, - "loss": 0.8284, - "step": 3207 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8896124077885715e-05, - "loss": 0.9101, - "step": 3208 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8895212964160322e-05, - "loss": 0.7585, - "step": 3209 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8894301496568118e-05, - "loss": 0.8277, - "step": 3210 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8893389675145362e-05, - "loss": 0.8792, - "step": 3211 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.889247749992833e-05, - "loss": 0.8278, - "step": 3212 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.889156497095331e-05, - "loss": 0.7209, - "step": 3213 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.88906520882566e-05, - "loss": 0.7612, - "step": 3214 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8889738851874524e-05, - "loss": 0.7455, - "step": 3215 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8888825261843403e-05, - "loss": 0.8227, - "step": 3216 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.888791131819959e-05, - "loss": 0.7466, - "step": 3217 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.888699702097944e-05, - "loss": 0.7076, - "step": 3218 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.888608237021932e-05, - "loss": 0.7703, - "step": 3219 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.888516736595562e-05, - "loss": 0.9731, - "step": 3220 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8884252008224748e-05, - "loss": 0.7625, - "step": 3221 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8883336297063106e-05, - "loss": 0.8828, - "step": 3222 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8882420232507127e-05, - "loss": 0.8639, - "step": 3223 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8881503814593256e-05, - "loss": 0.8544, - "step": 3224 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8880587043357947e-05, - "loss": 0.7897, - "step": 3225 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.887966991883768e-05, - "loss": 0.7716, - "step": 3226 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8878752441068922e-05, - "loss": 0.6651, - "step": 3227 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8877834610088186e-05, - "loss": 0.8024, - "step": 3228 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.887691642593198e-05, - "loss": 0.875, - "step": 3229 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8875997888636833e-05, - "loss": 0.9699, - "step": 3230 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8875078998239284e-05, - "loss": 0.7475, - "step": 3231 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8874159754775888e-05, - "loss": 0.8017, - "step": 3232 - }, - { - "epoch": 0.58, - "grad_norm": 0.0, - "learning_rate": 1.8873240158283214e-05, - "loss": 0.8384, - "step": 3233 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8872320208797847e-05, - "loss": 0.8773, - "step": 3234 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.887139990635638e-05, - "loss": 0.8153, - "step": 3235 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8870479250995433e-05, - "loss": 0.7593, - "step": 3236 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.886955824275162e-05, - "loss": 0.7452, - "step": 3237 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8868636881661588e-05, - "loss": 0.8312, - "step": 3238 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8867715167761987e-05, - "loss": 0.903, - "step": 3239 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.886679310108949e-05, - "loss": 0.682, - "step": 3240 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8865870681680765e-05, - "loss": 0.7348, - "step": 3241 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8864947909572525e-05, - "loss": 0.6991, - "step": 3242 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8864024784801467e-05, - "loss": 0.8164, - "step": 3243 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8863101307404318e-05, - "loss": 0.8856, - "step": 3244 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8862177477417812e-05, - "loss": 0.8032, - "step": 3245 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8861253294878708e-05, - "loss": 0.8257, - "step": 3246 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.886032875982377e-05, - "loss": 0.7932, - "step": 3247 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.885940387228977e-05, - "loss": 0.9111, - "step": 3248 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8858478632313513e-05, - "loss": 0.9183, - "step": 3249 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8857553039931798e-05, - "loss": 0.8436, - "step": 3250 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8856627095181447e-05, - "loss": 0.8123, - "step": 3251 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8855700798099298e-05, - "loss": 0.8034, - "step": 3252 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8854774148722204e-05, - "loss": 0.9456, - "step": 3253 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8853847147087025e-05, - "loss": 0.7126, - "step": 3254 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8852919793230637e-05, - "loss": 0.7756, - "step": 3255 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8851992087189933e-05, - "loss": 0.7898, - "step": 3256 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8851064029001823e-05, - "loss": 0.7143, - "step": 3257 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.885013561870322e-05, - "loss": 0.8029, - "step": 3258 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.884920685633106e-05, - "loss": 0.8074, - "step": 3259 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8848277741922295e-05, - "loss": 0.7201, - "step": 3260 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8847348275513885e-05, - "loss": 0.8327, - "step": 3261 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.88464184571428e-05, - "loss": 0.7552, - "step": 3262 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8845488286846035e-05, - "loss": 0.7786, - "step": 3263 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8844557764660594e-05, - "loss": 0.7581, - "step": 3264 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8843626890623494e-05, - "loss": 0.7992, - "step": 3265 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8842695664771763e-05, - "loss": 0.8881, - "step": 3266 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8841764087142453e-05, - "loss": 0.8392, - "step": 3267 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.884083215777262e-05, - "loss": 0.8091, - "step": 3268 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8839899876699337e-05, - "loss": 0.8142, - "step": 3269 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8838967243959695e-05, - "loss": 0.8605, - "step": 3270 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8838034259590795e-05, - "loss": 0.8473, - "step": 3271 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.883710092362975e-05, - "loss": 0.8771, - "step": 3272 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.883616723611369e-05, - "loss": 0.82, - "step": 3273 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8835233197079765e-05, - "loss": 0.7404, - "step": 3274 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8834298806565126e-05, - "loss": 0.8366, - "step": 3275 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8833364064606943e-05, - "loss": 0.7383, - "step": 3276 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.883242897124241e-05, - "loss": 0.8191, - "step": 3277 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.883149352650872e-05, - "loss": 0.8567, - "step": 3278 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.883055773044309e-05, - "loss": 0.858, - "step": 3279 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8829621583082743e-05, - "loss": 0.8767, - "step": 3280 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8828685084464923e-05, - "loss": 0.775, - "step": 3281 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8827748234626887e-05, - "loss": 0.8007, - "step": 3282 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8826811033605904e-05, - "loss": 0.862, - "step": 3283 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8825873481439255e-05, - "loss": 0.7918, - "step": 3284 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.882493557816424e-05, - "loss": 0.8317, - "step": 3285 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.882399732381817e-05, - "loss": 0.7517, - "step": 3286 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8823058718438375e-05, - "loss": 0.9044, - "step": 3287 - }, - { - "epoch": 0.59, - "grad_norm": 0.0, - "learning_rate": 1.8822119762062182e-05, - "loss": 0.8961, - "step": 3288 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.882118045472696e-05, - "loss": 0.7045, - "step": 3289 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.882024079647006e-05, - "loss": 0.8824, - "step": 3290 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8819300787328872e-05, - "loss": 0.9184, - "step": 3291 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8818360427340793e-05, - "loss": 0.8402, - "step": 3292 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8817419716543228e-05, - "loss": 0.7919, - "step": 3293 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8816478654973605e-05, - "loss": 0.7911, - "step": 3294 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8815537242669353e-05, - "loss": 0.8246, - "step": 3295 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.881459547966793e-05, - "loss": 0.7227, - "step": 3296 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.88136533660068e-05, - "loss": 0.8226, - "step": 3297 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.881271090172344e-05, - "loss": 0.8796, - "step": 3298 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8811768086855342e-05, - "loss": 0.8134, - "step": 3299 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8810824921440013e-05, - "loss": 0.798, - "step": 3300 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8809881405514976e-05, - "loss": 0.7409, - "step": 3301 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8808937539117765e-05, - "loss": 0.8266, - "step": 3302 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.880799332228593e-05, - "loss": 0.8408, - "step": 3303 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.880704875505703e-05, - "loss": 0.8528, - "step": 3304 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8806103837468642e-05, - "loss": 0.8278, - "step": 3305 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8805158569558356e-05, - "loss": 0.7977, - "step": 3306 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8804212951363782e-05, - "loss": 0.7332, - "step": 3307 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.880326698292253e-05, - "loss": 0.8085, - "step": 3308 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8802320664272242e-05, - "loss": 0.9391, - "step": 3309 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8801373995450557e-05, - "loss": 0.8683, - "step": 3310 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8800426976495135e-05, - "loss": 0.7442, - "step": 3311 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8799479607443652e-05, - "loss": 0.8246, - "step": 3312 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8798531888333797e-05, - "loss": 0.8636, - "step": 3313 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.879758381920327e-05, - "loss": 0.7332, - "step": 3314 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.879663540008979e-05, - "loss": 0.7863, - "step": 3315 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.879568663103108e-05, - "loss": 0.8236, - "step": 3316 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.879473751206489e-05, - "loss": 0.9141, - "step": 3317 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8793788043228977e-05, - "loss": 0.7545, - "step": 3318 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.879283822456111e-05, - "loss": 0.8473, - "step": 3319 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8791888056099076e-05, - "loss": 0.8108, - "step": 3320 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8790937537880675e-05, - "loss": 0.7542, - "step": 3321 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8789986669943714e-05, - "loss": 0.6886, - "step": 3322 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.878903545232603e-05, - "loss": 0.684, - "step": 3323 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.878808388506546e-05, - "loss": 0.7149, - "step": 3324 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8787131968199855e-05, - "loss": 0.8316, - "step": 3325 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8786179701767087e-05, - "loss": 0.8371, - "step": 3326 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.878522708580504e-05, - "loss": 0.868, - "step": 3327 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8784274120351613e-05, - "loss": 0.7102, - "step": 3328 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.878332080544471e-05, - "loss": 0.7862, - "step": 3329 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8782367141122257e-05, - "loss": 0.8601, - "step": 3330 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8781413127422194e-05, - "loss": 0.7372, - "step": 3331 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8780458764382477e-05, - "loss": 0.9206, - "step": 3332 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8779504052041064e-05, - "loss": 0.8645, - "step": 3333 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.877854899043594e-05, - "loss": 0.7138, - "step": 3334 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8777593579605105e-05, - "loss": 0.5868, - "step": 3335 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8776637819586556e-05, - "loss": 0.8129, - "step": 3336 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8775681710418322e-05, - "loss": 0.9216, - "step": 3337 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8774725252138432e-05, - "loss": 0.7673, - "step": 3338 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.877376844478494e-05, - "loss": 0.8544, - "step": 3339 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.8772811288395912e-05, - "loss": 0.9071, - "step": 3340 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.877185378300942e-05, - "loss": 0.898, - "step": 3341 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.877089592866356e-05, - "loss": 0.7937, - "step": 3342 - }, - { - "epoch": 0.6, - "grad_norm": 0.0, - "learning_rate": 1.876993772539643e-05, - "loss": 0.931, - "step": 3343 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8768979173246154e-05, - "loss": 0.8792, - "step": 3344 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.876802027225087e-05, - "loss": 0.92, - "step": 3345 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8767061022448713e-05, - "loss": 0.8138, - "step": 3346 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8766101423877858e-05, - "loss": 0.9104, - "step": 3347 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.876514147657646e-05, - "loss": 0.9141, - "step": 3348 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8764181180582725e-05, - "loss": 0.797, - "step": 3349 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.876322053593485e-05, - "loss": 0.7662, - "step": 3350 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8762259542671047e-05, - "loss": 0.8793, - "step": 3351 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.876129820082955e-05, - "loss": 0.8453, - "step": 3352 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.87603365104486e-05, - "loss": 0.7739, - "step": 3353 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8759374471566458e-05, - "loss": 0.6829, - "step": 3354 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8758412084221393e-05, - "loss": 0.9592, - "step": 3355 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.875744934845169e-05, - "loss": 0.6898, - "step": 3356 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8756486264295653e-05, - "loss": 0.9244, - "step": 3357 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.875552283179159e-05, - "loss": 0.7809, - "step": 3358 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.875455905097783e-05, - "loss": 0.8038, - "step": 3359 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8753594921892713e-05, - "loss": 0.8469, - "step": 3360 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8752630444574596e-05, - "loss": 0.869, - "step": 3361 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8751665619061846e-05, - "loss": 0.847, - "step": 3362 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.875070044539284e-05, - "loss": 0.9155, - "step": 3363 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8749734923605983e-05, - "loss": 0.7652, - "step": 3364 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8748769053739684e-05, - "loss": 0.8242, - "step": 3365 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8747802835832362e-05, - "loss": 0.9049, - "step": 3366 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8746836269922458e-05, - "loss": 0.798, - "step": 3367 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.874586935604842e-05, - "loss": 0.9221, - "step": 3368 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8744902094248722e-05, - "loss": 0.8281, - "step": 3369 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8743934484561835e-05, - "loss": 0.8698, - "step": 3370 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8742966527026255e-05, - "loss": 0.8689, - "step": 3371 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8741998221680488e-05, - "loss": 0.7435, - "step": 3372 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8741029568563054e-05, - "loss": 0.9424, - "step": 3373 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8740060567712497e-05, - "loss": 0.8811, - "step": 3374 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8739091219167353e-05, - "loss": 0.9378, - "step": 3375 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8738121522966192e-05, - "loss": 0.8479, - "step": 3376 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8737151479147582e-05, - "loss": 0.7896, - "step": 3377 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8736181087750123e-05, - "loss": 1.0118, - "step": 3378 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8735210348812413e-05, - "loss": 0.7939, - "step": 3379 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8734239262373074e-05, - "loss": 0.9249, - "step": 3380 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.873326782847073e-05, - "loss": 0.8647, - "step": 3381 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8732296047144034e-05, - "loss": 0.9331, - "step": 3382 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8731323918431642e-05, - "loss": 0.7859, - "step": 3383 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8730351442372225e-05, - "loss": 0.9625, - "step": 3384 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8729378619004472e-05, - "loss": 0.7651, - "step": 3385 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8728405448367084e-05, - "loss": 0.764, - "step": 3386 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8727431930498775e-05, - "loss": 0.8388, - "step": 3387 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8726458065438272e-05, - "loss": 0.7763, - "step": 3388 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.872548385322432e-05, - "loss": 0.7588, - "step": 3389 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.872450929389567e-05, - "loss": 0.9007, - "step": 3390 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8723534387491096e-05, - "loss": 0.7469, - "step": 3391 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8722559134049378e-05, - "loss": 0.9022, - "step": 3392 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8721583533609317e-05, - "loss": 0.7607, - "step": 3393 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.872060758620972e-05, - "loss": 0.9588, - "step": 3394 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8719631291889418e-05, - "loss": 0.7201, - "step": 3395 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8718654650687242e-05, - "loss": 0.7808, - "step": 3396 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.871767766264205e-05, - "loss": 0.9184, - "step": 3397 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8716700327792704e-05, - "loss": 0.7913, - "step": 3398 - }, - { - "epoch": 0.61, - "grad_norm": 0.0, - "learning_rate": 1.8715722646178092e-05, - "loss": 0.8136, - "step": 3399 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8714744617837097e-05, - "loss": 0.8274, - "step": 3400 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8713766242808632e-05, - "loss": 0.7615, - "step": 3401 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8712787521131623e-05, - "loss": 0.7346, - "step": 3402 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8711808452844994e-05, - "loss": 0.7712, - "step": 3403 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8710829037987706e-05, - "loss": 0.9367, - "step": 3404 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8709849276598713e-05, - "loss": 0.7757, - "step": 3405 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8708869168716998e-05, - "loss": 0.7513, - "step": 3406 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8707888714381545e-05, - "loss": 0.8205, - "step": 3407 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.870690791363136e-05, - "loss": 0.9091, - "step": 3408 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.870592676650547e-05, - "loss": 0.8698, - "step": 3409 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8704945273042894e-05, - "loss": 0.8183, - "step": 3410 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.870396343328268e-05, - "loss": 0.7642, - "step": 3411 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8702981247263895e-05, - "loss": 0.7867, - "step": 3412 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8701998715025603e-05, - "loss": 0.8853, - "step": 3413 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.87010158366069e-05, - "loss": 0.7764, - "step": 3414 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.870003261204688e-05, - "loss": 0.8162, - "step": 3415 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8699049041384657e-05, - "loss": 0.7532, - "step": 3416 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.869806512465936e-05, - "loss": 0.7615, - "step": 3417 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8697080861910132e-05, - "loss": 0.7676, - "step": 3418 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8696096253176126e-05, - "loss": 0.7055, - "step": 3419 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.869511129849652e-05, - "loss": 0.7256, - "step": 3420 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8694125997910487e-05, - "loss": 0.7562, - "step": 3421 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8693140351457228e-05, - "loss": 0.8285, - "step": 3422 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8692154359175957e-05, - "loss": 0.8294, - "step": 3423 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8691168021105893e-05, - "loss": 0.8156, - "step": 3424 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8690181337286274e-05, - "loss": 0.7961, - "step": 3425 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8689194307756354e-05, - "loss": 0.7496, - "step": 3426 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8688206932555402e-05, - "loss": 0.8591, - "step": 3427 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8687219211722693e-05, - "loss": 0.8014, - "step": 3428 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8686231145297523e-05, - "loss": 0.7707, - "step": 3429 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8685242733319198e-05, - "loss": 0.8205, - "step": 3430 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8684253975827036e-05, - "loss": 0.8167, - "step": 3431 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8683264872860377e-05, - "loss": 0.879, - "step": 3432 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8682275424458565e-05, - "loss": 0.8066, - "step": 3433 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8681285630660962e-05, - "loss": 0.7806, - "step": 3434 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8680295491506942e-05, - "loss": 0.7494, - "step": 3435 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8679305007035902e-05, - "loss": 0.8174, - "step": 3436 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.867831417728724e-05, - "loss": 0.8327, - "step": 3437 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8677323002300373e-05, - "loss": 0.8015, - "step": 3438 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.867633148211473e-05, - "loss": 0.9239, - "step": 3439 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.867533961676976e-05, - "loss": 0.8227, - "step": 3440 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8674347406304914e-05, - "loss": 0.8385, - "step": 3441 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.867335485075967e-05, - "loss": 0.8305, - "step": 3442 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8672361950173514e-05, - "loss": 0.9087, - "step": 3443 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.867136870458594e-05, - "loss": 0.8295, - "step": 3444 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8670375114036464e-05, - "loss": 0.8309, - "step": 3445 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8669381178564617e-05, - "loss": 0.6617, - "step": 3446 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8668386898209933e-05, - "loss": 0.9839, - "step": 3447 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8667392273011965e-05, - "loss": 0.9479, - "step": 3448 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.866639730301029e-05, - "loss": 0.8389, - "step": 3449 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8665401988244482e-05, - "loss": 0.7209, - "step": 3450 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8664406328754138e-05, - "loss": 0.8104, - "step": 3451 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8663410324578867e-05, - "loss": 0.7979, - "step": 3452 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8662413975758287e-05, - "loss": 0.7414, - "step": 3453 - }, - { - "epoch": 0.62, - "grad_norm": 0.0, - "learning_rate": 1.8661417282332047e-05, - "loss": 0.8026, - "step": 3454 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8660420244339786e-05, - "loss": 0.9417, - "step": 3455 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8659422861821173e-05, - "loss": 0.8092, - "step": 3456 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8658425134815884e-05, - "loss": 0.7584, - "step": 3457 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.865742706336361e-05, - "loss": 0.8485, - "step": 3458 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.865642864750406e-05, - "loss": 0.7684, - "step": 3459 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.865542988727694e-05, - "loss": 0.7798, - "step": 3460 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8654430782722e-05, - "loss": 0.9083, - "step": 3461 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8653431333878976e-05, - "loss": 0.815, - "step": 3462 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.865243154078763e-05, - "loss": 0.8801, - "step": 3463 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.865143140348773e-05, - "loss": 0.7886, - "step": 3464 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8650430922019072e-05, - "loss": 0.752, - "step": 3465 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8649430096421454e-05, - "loss": 0.6797, - "step": 3466 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8648428926734684e-05, - "loss": 0.7913, - "step": 3467 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.86474274129986e-05, - "loss": 0.8655, - "step": 3468 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8646425555253038e-05, - "loss": 0.7876, - "step": 3469 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8645423353537856e-05, - "loss": 0.761, - "step": 3470 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8644420807892922e-05, - "loss": 0.7985, - "step": 3471 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.864341791835812e-05, - "loss": 0.766, - "step": 3472 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8642414684973347e-05, - "loss": 0.8781, - "step": 3473 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8641411107778513e-05, - "loss": 0.8915, - "step": 3474 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.864040718681354e-05, - "loss": 0.7483, - "step": 3475 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.863940292211837e-05, - "loss": 0.8062, - "step": 3476 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.863839831373295e-05, - "loss": 0.7379, - "step": 3477 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8637393361697244e-05, - "loss": 0.8108, - "step": 3478 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8636388066051237e-05, - "loss": 0.8334, - "step": 3479 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8635382426834914e-05, - "loss": 0.7978, - "step": 3480 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8634376444088288e-05, - "loss": 0.9987, - "step": 3481 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8633370117851375e-05, - "loss": 0.6799, - "step": 3482 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8632363448164208e-05, - "loss": 0.6419, - "step": 3483 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8631356435066837e-05, - "loss": 0.6812, - "step": 3484 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.863034907859932e-05, - "loss": 0.9122, - "step": 3485 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.862934137880173e-05, - "loss": 0.7271, - "step": 3486 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8628333335714156e-05, - "loss": 0.917, - "step": 3487 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8627324949376705e-05, - "loss": 0.744, - "step": 3488 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8626316219829487e-05, - "loss": 0.7489, - "step": 3489 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8625307147112633e-05, - "loss": 0.7592, - "step": 3490 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.862429773126628e-05, - "loss": 0.8763, - "step": 3491 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8623287972330594e-05, - "loss": 0.9851, - "step": 3492 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8622277870345737e-05, - "loss": 0.9286, - "step": 3493 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8621267425351896e-05, - "loss": 0.9165, - "step": 3494 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.862025663738927e-05, - "loss": 0.7084, - "step": 3495 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8619245506498065e-05, - "loss": 0.7844, - "step": 3496 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8618234032718508e-05, - "loss": 0.822, - "step": 3497 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.861722221609084e-05, - "loss": 0.8845, - "step": 3498 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8616210056655307e-05, - "loss": 0.8501, - "step": 3499 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.861519755445218e-05, - "loss": 0.7795, - "step": 3500 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8614184709521734e-05, - "loss": 0.7685, - "step": 3501 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8613171521904264e-05, - "loss": 0.7574, - "step": 3502 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8612157991640076e-05, - "loss": 0.7651, - "step": 3503 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8611144118769492e-05, - "loss": 0.8003, - "step": 3504 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8610129903332838e-05, - "loss": 0.8511, - "step": 3505 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.860911534537047e-05, - "loss": 0.8941, - "step": 3506 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8608100444922743e-05, - "loss": 0.7692, - "step": 3507 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.860708520203004e-05, - "loss": 0.9803, - "step": 3508 - }, - { - "epoch": 0.63, - "grad_norm": 0.0, - "learning_rate": 1.8606069616732732e-05, - "loss": 0.7583, - "step": 3509 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.860505368907124e-05, - "loss": 0.8198, - "step": 3510 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.860403741908597e-05, - "loss": 0.9237, - "step": 3511 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.860302080681735e-05, - "loss": 0.7288, - "step": 3512 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8602003852305826e-05, - "loss": 0.8825, - "step": 3513 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.860098655559185e-05, - "loss": 0.8785, - "step": 3514 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85999689167159e-05, - "loss": 0.6907, - "step": 3515 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8598950935718448e-05, - "loss": 0.7357, - "step": 3516 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.859793261264e-05, - "loss": 0.838, - "step": 3517 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8596913947521064e-05, - "loss": 0.8308, - "step": 3518 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.859589494040216e-05, - "loss": 0.7638, - "step": 3519 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8594875591323833e-05, - "loss": 0.8734, - "step": 3520 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8593855900326633e-05, - "loss": 0.9015, - "step": 3521 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.859283586745112e-05, - "loss": 0.9498, - "step": 3522 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8591815492737874e-05, - "loss": 0.8334, - "step": 3523 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8590794776227493e-05, - "loss": 0.8459, - "step": 3524 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8589773717960578e-05, - "loss": 0.8367, - "step": 3525 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.858875231797775e-05, - "loss": 0.8159, - "step": 3526 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.858773057631964e-05, - "loss": 0.7615, - "step": 3527 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8586708493026895e-05, - "loss": 0.7771, - "step": 3528 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8585686068140178e-05, - "loss": 0.7175, - "step": 3529 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8584663301700163e-05, - "loss": 0.8772, - "step": 3530 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8583640193747534e-05, - "loss": 0.8538, - "step": 3531 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8582616744322994e-05, - "loss": 0.9846, - "step": 3532 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8581592953467255e-05, - "loss": 0.8822, - "step": 3533 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8580568821221047e-05, - "loss": 0.929, - "step": 3534 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8579544347625114e-05, - "loss": 0.8784, - "step": 3535 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8578519532720207e-05, - "loss": 0.8533, - "step": 3536 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85774943765471e-05, - "loss": 0.6288, - "step": 3537 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.857646887914657e-05, - "loss": 0.9134, - "step": 3538 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8575443040559416e-05, - "loss": 0.8061, - "step": 3539 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8574416860826452e-05, - "loss": 0.7733, - "step": 3540 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.857339033998849e-05, - "loss": 0.7806, - "step": 3541 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.857236347808638e-05, - "loss": 0.7325, - "step": 3542 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.857133627516096e-05, - "loss": 0.7786, - "step": 3543 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85703087312531e-05, - "loss": 0.8307, - "step": 3544 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.856928084640368e-05, - "loss": 0.8834, - "step": 3545 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8568252620653585e-05, - "loss": 0.7812, - "step": 3546 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8567224054043724e-05, - "loss": 0.7496, - "step": 3547 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8566195146615015e-05, - "loss": 0.881, - "step": 3548 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8565165898408383e-05, - "loss": 0.685, - "step": 3549 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.856413630946478e-05, - "loss": 1.0261, - "step": 3550 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8563106379825167e-05, - "loss": 0.8469, - "step": 3551 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8562076109530512e-05, - "loss": 0.7807, - "step": 3552 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85610454986218e-05, - "loss": 0.8907, - "step": 3553 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8560014547140035e-05, - "loss": 0.8251, - "step": 3554 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8558983255126225e-05, - "loss": 0.689, - "step": 3555 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85579516226214e-05, - "loss": 0.7612, - "step": 3556 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8556919649666597e-05, - "loss": 0.7231, - "step": 3557 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8555887336302872e-05, - "loss": 0.8301, - "step": 3558 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8554854682571294e-05, - "loss": 0.6261, - "step": 3559 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.855382168851294e-05, - "loss": 0.7847, - "step": 3560 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8552788354168907e-05, - "loss": 0.8208, - "step": 3561 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.85517546795803e-05, - "loss": 0.7317, - "step": 3562 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8550720664788242e-05, - "loss": 0.7884, - "step": 3563 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8549686309833866e-05, - "loss": 0.8964, - "step": 3564 - }, - { - "epoch": 0.64, - "grad_norm": 0.0, - "learning_rate": 1.8548651614758322e-05, - "loss": 0.8585, - "step": 3565 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8547616579602778e-05, - "loss": 0.7637, - "step": 3566 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8546581204408397e-05, - "loss": 0.7951, - "step": 3567 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8545545489216378e-05, - "loss": 0.7292, - "step": 3568 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.854450943406792e-05, - "loss": 0.844, - "step": 3569 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.854347303900424e-05, - "loss": 0.7488, - "step": 3570 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.854243630406656e-05, - "loss": 0.8228, - "step": 3571 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8541399229296138e-05, - "loss": 0.7775, - "step": 3572 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.854036181473422e-05, - "loss": 0.9481, - "step": 3573 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.853932406042208e-05, - "loss": 0.8889, - "step": 3574 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8538285966400995e-05, - "loss": 0.8688, - "step": 3575 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.853724753271227e-05, - "loss": 0.6675, - "step": 3576 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8536208759397213e-05, - "loss": 0.7991, - "step": 3577 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.853516964649715e-05, - "loss": 0.8442, - "step": 3578 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8534130194053417e-05, - "loss": 0.8361, - "step": 3579 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.853309040210736e-05, - "loss": 0.8324, - "step": 3580 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8532050270700354e-05, - "loss": 0.8014, - "step": 3581 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8531009799873772e-05, - "loss": 0.828, - "step": 3582 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8529968989669002e-05, - "loss": 0.8651, - "step": 3583 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.852892784012746e-05, - "loss": 0.8667, - "step": 3584 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.852788635129055e-05, - "loss": 0.8272, - "step": 3585 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8526844523199713e-05, - "loss": 0.7568, - "step": 3586 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8525802355896397e-05, - "loss": 0.8751, - "step": 3587 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8524759849422055e-05, - "loss": 0.8893, - "step": 3588 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8523717003818164e-05, - "loss": 0.8035, - "step": 3589 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8522673819126208e-05, - "loss": 0.8928, - "step": 3590 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8521630295387688e-05, - "loss": 0.7156, - "step": 3591 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8520586432644116e-05, - "loss": 0.835, - "step": 3592 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851954223093702e-05, - "loss": 0.8191, - "step": 3593 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851849769030794e-05, - "loss": 0.9147, - "step": 3594 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8517452810798425e-05, - "loss": 0.8685, - "step": 3595 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851640759245005e-05, - "loss": 0.685, - "step": 3596 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851536203530439e-05, - "loss": 0.7994, - "step": 3597 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851431613940304e-05, - "loss": 0.8515, - "step": 3598 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8513269904787607e-05, - "loss": 0.8461, - "step": 3599 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.851222333149972e-05, - "loss": 0.8321, - "step": 3600 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8511176419580997e-05, - "loss": 0.7598, - "step": 3601 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8510129169073103e-05, - "loss": 0.8929, - "step": 3602 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8509081580017686e-05, - "loss": 0.8822, - "step": 3603 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8508033652456428e-05, - "loss": 0.8004, - "step": 3604 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.850698538643102e-05, - "loss": 0.8511, - "step": 3605 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.850593678198316e-05, - "loss": 0.7494, - "step": 3606 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.850488783915456e-05, - "loss": 0.9288, - "step": 3607 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8503838557986953e-05, - "loss": 0.7242, - "step": 3608 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.850278893852208e-05, - "loss": 0.9006, - "step": 3609 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.85017389808017e-05, - "loss": 0.8001, - "step": 3610 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8500688684867576e-05, - "loss": 0.7464, - "step": 3611 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8499638050761494e-05, - "loss": 0.8097, - "step": 3612 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.849858707852525e-05, - "loss": 0.7553, - "step": 3613 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8497535768200654e-05, - "loss": 0.8357, - "step": 3614 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8496484119829525e-05, - "loss": 0.7617, - "step": 3615 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8495432133453707e-05, - "loss": 0.7854, - "step": 3616 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8494379809115043e-05, - "loss": 0.627, - "step": 3617 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8493327146855396e-05, - "loss": 0.7761, - "step": 3618 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.8492274146716648e-05, - "loss": 0.771, - "step": 3619 - }, - { - "epoch": 0.65, - "grad_norm": 0.0, - "learning_rate": 1.849122080874069e-05, - "loss": 0.8527, - "step": 3620 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8490167132969415e-05, - "loss": 0.7917, - "step": 3621 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8489113119444752e-05, - "loss": 0.8214, - "step": 3622 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8488058768208622e-05, - "loss": 0.8443, - "step": 3623 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8487004079302977e-05, - "loss": 0.7624, - "step": 3624 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.848594905276977e-05, - "loss": 0.7292, - "step": 3625 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.848489368865097e-05, - "loss": 0.8246, - "step": 3626 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8483837986988566e-05, - "loss": 0.8873, - "step": 3627 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8482781947824552e-05, - "loss": 0.807, - "step": 3628 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8481725571200942e-05, - "loss": 0.8609, - "step": 3629 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.848066885715976e-05, - "loss": 0.9181, - "step": 3630 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8479611805743037e-05, - "loss": 0.8329, - "step": 3631 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8478554416992836e-05, - "loss": 0.862, - "step": 3632 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8477496690951212e-05, - "loss": 0.7447, - "step": 3633 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.847643862766025e-05, - "loss": 0.7513, - "step": 3634 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8475380227162037e-05, - "loss": 0.9237, - "step": 3635 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8474321489498678e-05, - "loss": 0.7504, - "step": 3636 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8473262414712295e-05, - "loss": 0.8875, - "step": 3637 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8472203002845015e-05, - "loss": 0.6982, - "step": 3638 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.847114325393899e-05, - "loss": 0.7807, - "step": 3639 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.847008316803637e-05, - "loss": 0.7019, - "step": 3640 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8469022745179335e-05, - "loss": 0.8206, - "step": 3641 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8467961985410064e-05, - "loss": 0.7192, - "step": 3642 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.846690088877076e-05, - "loss": 0.8532, - "step": 3643 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8465839455303633e-05, - "loss": 0.7512, - "step": 3644 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8464777685050914e-05, - "loss": 0.9186, - "step": 3645 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.846371557805483e-05, - "loss": 0.7859, - "step": 3646 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.846265313435765e-05, - "loss": 0.8026, - "step": 3647 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8461590354001627e-05, - "loss": 0.7941, - "step": 3648 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8460527237029042e-05, - "loss": 0.7571, - "step": 3649 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8459463783482193e-05, - "loss": 0.7699, - "step": 3650 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.845839999340338e-05, - "loss": 0.6932, - "step": 3651 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8457335866834927e-05, - "loss": 0.7597, - "step": 3652 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8456271403819165e-05, - "loss": 0.9438, - "step": 3653 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.845520660439844e-05, - "loss": 0.8621, - "step": 3654 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8454141468615112e-05, - "loss": 0.8823, - "step": 3655 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8453075996511555e-05, - "loss": 0.8207, - "step": 3656 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.845201018813015e-05, - "loss": 0.7781, - "step": 3657 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8450944043513302e-05, - "loss": 0.7353, - "step": 3658 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8449877562703426e-05, - "loss": 0.8141, - "step": 3659 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.844881074574294e-05, - "loss": 0.8975, - "step": 3660 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8447743592674293e-05, - "loss": 0.8807, - "step": 3661 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8446676103539934e-05, - "loss": 0.9286, - "step": 3662 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.844560827838233e-05, - "loss": 0.8037, - "step": 3663 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8444540117243958e-05, - "loss": 0.9266, - "step": 3664 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8443471620167313e-05, - "loss": 0.8659, - "step": 3665 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8442402787194905e-05, - "loss": 0.8077, - "step": 3666 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8441333618369247e-05, - "loss": 0.7205, - "step": 3667 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8440264113732884e-05, - "loss": 0.7863, - "step": 3668 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8439194273328352e-05, - "loss": 0.7499, - "step": 3669 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8438124097198214e-05, - "loss": 0.8675, - "step": 3670 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8437053585385044e-05, - "loss": 0.9359, - "step": 3671 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.843598273793143e-05, - "loss": 0.7965, - "step": 3672 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.843491155487997e-05, - "loss": 0.8549, - "step": 3673 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.843384003627328e-05, - "loss": 0.8246, - "step": 3674 - }, - { - "epoch": 0.66, - "grad_norm": 0.0, - "learning_rate": 1.8432768182153985e-05, - "loss": 0.7047, - "step": 3675 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8431695992564725e-05, - "loss": 0.6729, - "step": 3676 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8430623467548156e-05, - "loss": 0.9467, - "step": 3677 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8429550607146942e-05, - "loss": 0.9179, - "step": 3678 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8428477411403762e-05, - "loss": 0.8325, - "step": 3679 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8427403880361313e-05, - "loss": 0.7763, - "step": 3680 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.84263300140623e-05, - "loss": 0.8365, - "step": 3681 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8425255812549448e-05, - "loss": 0.7609, - "step": 3682 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8424181275865482e-05, - "loss": 0.7467, - "step": 3683 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8423106404053154e-05, - "loss": 0.7524, - "step": 3684 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8422031197155225e-05, - "loss": 0.7499, - "step": 3685 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8420955655214468e-05, - "loss": 0.9271, - "step": 3686 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8419879778273668e-05, - "loss": 0.8369, - "step": 3687 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8418803566375626e-05, - "loss": 0.9618, - "step": 3688 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8417727019563156e-05, - "loss": 0.8591, - "step": 3689 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8416650137879087e-05, - "loss": 0.8456, - "step": 3690 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8415572921366255e-05, - "loss": 0.8056, - "step": 3691 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8414495370067517e-05, - "loss": 0.7621, - "step": 3692 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8413417484025738e-05, - "loss": 0.8599, - "step": 3693 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8412339263283793e-05, - "loss": 0.8271, - "step": 3694 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.841126070788459e-05, - "loss": 0.7874, - "step": 3695 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.841018181787102e-05, - "loss": 0.8749, - "step": 3696 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8409102593286012e-05, - "loss": 0.7552, - "step": 3697 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8408023034172498e-05, - "loss": 0.8004, - "step": 3698 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.840694314057342e-05, - "loss": 0.7353, - "step": 3699 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8405862912531747e-05, - "loss": 0.756, - "step": 3700 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8404782350090448e-05, - "loss": 0.7936, - "step": 3701 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8403701453292505e-05, - "loss": 0.8703, - "step": 3702 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8402620222180923e-05, - "loss": 0.8162, - "step": 3703 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8401538656798713e-05, - "loss": 0.6704, - "step": 3704 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.840045675718891e-05, - "loss": 0.8784, - "step": 3705 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8399374523394534e-05, - "loss": 0.828, - "step": 3706 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.839829195545866e-05, - "loss": 0.7543, - "step": 3707 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8397209053424342e-05, - "loss": 0.9146, - "step": 3708 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.839612581733466e-05, - "loss": 0.753, - "step": 3709 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8395042247232716e-05, - "loss": 0.8327, - "step": 3710 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8393958343161604e-05, - "loss": 0.7588, - "step": 3711 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.839287410516445e-05, - "loss": 0.6707, - "step": 3712 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.839178953328439e-05, - "loss": 0.8285, - "step": 3713 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8390704627564563e-05, - "loss": 0.8632, - "step": 3714 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8389619388048134e-05, - "loss": 0.76, - "step": 3715 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.838853381477827e-05, - "loss": 1.0171, - "step": 3716 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8387447907798167e-05, - "loss": 0.6757, - "step": 3717 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.838636166715101e-05, - "loss": 0.7889, - "step": 3718 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8385275092880026e-05, - "loss": 0.7635, - "step": 3719 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.838418818502843e-05, - "loss": 0.7097, - "step": 3720 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8383100943639467e-05, - "loss": 0.8075, - "step": 3721 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.838201336875639e-05, - "loss": 0.8068, - "step": 3722 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8380925460422455e-05, - "loss": 0.8083, - "step": 3723 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.837983721868095e-05, - "loss": 0.7765, - "step": 3724 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8378748643575168e-05, - "loss": 0.9251, - "step": 3725 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.837765973514841e-05, - "loss": 0.7555, - "step": 3726 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8376570493443994e-05, - "loss": 0.7619, - "step": 3727 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.837548091850526e-05, - "loss": 0.7966, - "step": 3728 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.837439101037554e-05, - "loss": 0.7831, - "step": 3729 - }, - { - "epoch": 0.67, - "grad_norm": 0.0, - "learning_rate": 1.8373300769098204e-05, - "loss": 0.8238, - "step": 3730 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.837221019471662e-05, - "loss": 0.7826, - "step": 3731 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8371119287274165e-05, - "loss": 0.865, - "step": 3732 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.837002804681425e-05, - "loss": 0.8144, - "step": 3733 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8368936473380278e-05, - "loss": 0.8427, - "step": 3734 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.836784456701568e-05, - "loss": 0.8272, - "step": 3735 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8366752327763885e-05, - "loss": 0.7392, - "step": 3736 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8365659755668356e-05, - "loss": 0.8714, - "step": 3737 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8364566850772544e-05, - "loss": 0.7649, - "step": 3738 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8363473613119938e-05, - "loss": 0.7939, - "step": 3739 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8362380042754023e-05, - "loss": 0.8181, - "step": 3740 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8361286139718303e-05, - "loss": 0.8993, - "step": 3741 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.83601919040563e-05, - "loss": 0.9542, - "step": 3742 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835909733581154e-05, - "loss": 0.7329, - "step": 3743 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8358002435027565e-05, - "loss": 0.8587, - "step": 3744 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8356907201747937e-05, - "loss": 0.8464, - "step": 3745 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835581163601622e-05, - "loss": 0.7909, - "step": 3746 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835471573787601e-05, - "loss": 0.8858, - "step": 3747 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835361950737089e-05, - "loss": 0.8708, - "step": 3748 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835252294454448e-05, - "loss": 0.7044, - "step": 3749 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.835142604944039e-05, - "loss": 0.9435, - "step": 3750 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8350328822102277e-05, - "loss": 0.8194, - "step": 3751 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.834923126257377e-05, - "loss": 0.7921, - "step": 3752 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8348133370898545e-05, - "loss": 0.73, - "step": 3753 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.834703514712027e-05, - "loss": 0.9427, - "step": 3754 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8345936591282637e-05, - "loss": 0.9695, - "step": 3755 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8344837703429352e-05, - "loss": 0.7534, - "step": 3756 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8343738483604126e-05, - "loss": 0.8986, - "step": 3757 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.834263893185069e-05, - "loss": 0.9069, - "step": 3758 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8341539048212787e-05, - "loss": 0.7843, - "step": 3759 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.834043883273417e-05, - "loss": 0.7787, - "step": 3760 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8339338285458607e-05, - "loss": 0.8887, - "step": 3761 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.833823740642988e-05, - "loss": 0.8691, - "step": 3762 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8337136195691786e-05, - "loss": 0.8289, - "step": 3763 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.833603465328813e-05, - "loss": 0.7305, - "step": 3764 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.833493277926274e-05, - "loss": 0.7038, - "step": 3765 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8333830573659443e-05, - "loss": 0.8085, - "step": 3766 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8332728036522087e-05, - "loss": 0.7436, - "step": 3767 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8331625167894538e-05, - "loss": 0.7476, - "step": 3768 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8330521967820666e-05, - "loss": 0.8336, - "step": 3769 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8329418436344358e-05, - "loss": 0.7681, - "step": 3770 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8328314573509515e-05, - "loss": 0.7625, - "step": 3771 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8327210379360053e-05, - "loss": 0.7845, - "step": 3772 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.83261058539399e-05, - "loss": 0.8926, - "step": 3773 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8325000997292985e-05, - "loss": 0.8601, - "step": 3774 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8323895809463274e-05, - "loss": 0.7963, - "step": 3775 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8322790290494726e-05, - "loss": 0.7707, - "step": 3776 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8321684440431326e-05, - "loss": 0.8581, - "step": 3777 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8320578259317055e-05, - "loss": 0.8829, - "step": 3778 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8319471747195935e-05, - "loss": 0.8079, - "step": 3779 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8318364904111972e-05, - "loss": 0.9047, - "step": 3780 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8317257730109208e-05, - "loss": 0.7343, - "step": 3781 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.831615022523168e-05, - "loss": 0.8141, - "step": 3782 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.831504238952345e-05, - "loss": 0.8005, - "step": 3783 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.831393422302859e-05, - "loss": 0.77, - "step": 3784 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8312825725791182e-05, - "loss": 0.8798, - "step": 3785 - }, - { - "epoch": 0.68, - "grad_norm": 0.0, - "learning_rate": 1.8311716897855327e-05, - "loss": 0.799, - "step": 3786 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8310607739265135e-05, - "loss": 0.7964, - "step": 3787 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.830949825006473e-05, - "loss": 0.7694, - "step": 3788 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8308388430298253e-05, - "loss": 0.816, - "step": 3789 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8307278280009848e-05, - "loss": 0.8403, - "step": 3790 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.830616779924368e-05, - "loss": 0.7895, - "step": 3791 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.830505698804393e-05, - "loss": 0.8736, - "step": 3792 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.830394584645479e-05, - "loss": 0.8856, - "step": 3793 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8302834374520452e-05, - "loss": 0.781, - "step": 3794 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8301722572285144e-05, - "loss": 0.7538, - "step": 3795 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8300610439793085e-05, - "loss": 0.8526, - "step": 3796 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8299497977088526e-05, - "loss": 0.7952, - "step": 3797 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.829838518421572e-05, - "loss": 0.7567, - "step": 3798 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8297272061218935e-05, - "loss": 0.8414, - "step": 3799 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8296158608142454e-05, - "loss": 0.9369, - "step": 3800 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8295044825030572e-05, - "loss": 0.9763, - "step": 3801 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8293930711927594e-05, - "loss": 0.797, - "step": 3802 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8292816268877845e-05, - "loss": 0.7066, - "step": 3803 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.829170149592566e-05, - "loss": 0.7656, - "step": 3804 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8290586393115383e-05, - "loss": 0.8979, - "step": 3805 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.828947096049138e-05, - "loss": 0.9317, - "step": 3806 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8288355198098017e-05, - "loss": 0.8052, - "step": 3807 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.828723910597969e-05, - "loss": 0.6894, - "step": 3808 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.828612268418079e-05, - "loss": 0.7906, - "step": 3809 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8285005932745735e-05, - "loss": 0.785, - "step": 3810 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8283888851718957e-05, - "loss": 0.9896, - "step": 3811 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8282771441144884e-05, - "loss": 0.8137, - "step": 3812 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8281653701067977e-05, - "loss": 0.9935, - "step": 3813 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8280535631532696e-05, - "loss": 0.8285, - "step": 3814 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8279417232583524e-05, - "loss": 0.9397, - "step": 3815 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.827829850426495e-05, - "loss": 0.8015, - "step": 3816 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.827717944662148e-05, - "loss": 0.8385, - "step": 3817 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.827606005969763e-05, - "loss": 0.9012, - "step": 3818 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8274940343537935e-05, - "loss": 0.8585, - "step": 3819 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.827382029818694e-05, - "loss": 0.8891, - "step": 3820 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8272699923689195e-05, - "loss": 0.7863, - "step": 3821 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8271579220089276e-05, - "loss": 0.7147, - "step": 3822 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.827045818743177e-05, - "loss": 0.9586, - "step": 3823 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8269336825761266e-05, - "loss": 0.8762, - "step": 3824 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8268215135122375e-05, - "loss": 0.8824, - "step": 3825 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8267093115559723e-05, - "loss": 0.8988, - "step": 3826 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.826597076711795e-05, - "loss": 0.8374, - "step": 3827 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8264848089841694e-05, - "loss": 0.826, - "step": 3828 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8263725083775622e-05, - "loss": 0.831, - "step": 3829 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.826260174896441e-05, - "loss": 0.7296, - "step": 3830 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8261478085452747e-05, - "loss": 0.8608, - "step": 3831 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8260354093285334e-05, - "loss": 0.683, - "step": 3832 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8259229772506884e-05, - "loss": 0.9117, - "step": 3833 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8258105123162125e-05, - "loss": 0.8296, - "step": 3834 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8256980145295797e-05, - "loss": 0.7857, - "step": 3835 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8255854838952653e-05, - "loss": 1.0103, - "step": 3836 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.825472920417746e-05, - "loss": 0.8255, - "step": 3837 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8253603241014998e-05, - "loss": 0.8546, - "step": 3838 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8252476949510062e-05, - "loss": 0.7631, - "step": 3839 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8251350329707452e-05, - "loss": 0.7538, - "step": 3840 - }, - { - "epoch": 0.69, - "grad_norm": 0.0, - "learning_rate": 1.8250223381651992e-05, - "loss": 0.7985, - "step": 3841 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8249096105388514e-05, - "loss": 0.7971, - "step": 3842 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8247968500961863e-05, - "loss": 0.8364, - "step": 3843 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.824684056841689e-05, - "loss": 0.9696, - "step": 3844 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8245712307798475e-05, - "loss": 0.7744, - "step": 3845 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.82445837191515e-05, - "loss": 0.9123, - "step": 3846 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8243454802520858e-05, - "loss": 0.8087, - "step": 3847 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8242325557951466e-05, - "loss": 0.8682, - "step": 3848 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8241195985488242e-05, - "loss": 0.7882, - "step": 3849 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.824006608517612e-05, - "loss": 0.8639, - "step": 3850 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8238935857060057e-05, - "loss": 0.8433, - "step": 3851 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.823780530118501e-05, - "loss": 0.7809, - "step": 3852 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8236674417595957e-05, - "loss": 0.8614, - "step": 3853 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8235543206337883e-05, - "loss": 0.7203, - "step": 3854 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8234411667455796e-05, - "loss": 0.7134, - "step": 3855 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8233279800994704e-05, - "loss": 0.7426, - "step": 3856 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8232147606999636e-05, - "loss": 0.9316, - "step": 3857 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8231015085515636e-05, - "loss": 0.8537, - "step": 3858 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8229882236587755e-05, - "loss": 0.8724, - "step": 3859 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.822874906026106e-05, - "loss": 0.6627, - "step": 3860 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8227615556580632e-05, - "loss": 0.7359, - "step": 3861 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8226481725591557e-05, - "loss": 0.9728, - "step": 3862 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8225347567338953e-05, - "loss": 0.7786, - "step": 3863 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.822421308186793e-05, - "loss": 0.6702, - "step": 3864 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8223078269223617e-05, - "loss": 0.8174, - "step": 3865 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8221943129451168e-05, - "loss": 0.8357, - "step": 3866 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8220807662595737e-05, - "loss": 0.9901, - "step": 3867 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8219671868702495e-05, - "loss": 0.6841, - "step": 3868 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8218535747816618e-05, - "loss": 0.7619, - "step": 3869 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.821739929998332e-05, - "loss": 0.8077, - "step": 3870 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8216262525247793e-05, - "loss": 0.8783, - "step": 3871 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8215125423655268e-05, - "loss": 0.8529, - "step": 3872 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.821398799525098e-05, - "loss": 0.715, - "step": 3873 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8212850240080186e-05, - "loss": 0.7926, - "step": 3874 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8211712158188133e-05, - "loss": 0.8299, - "step": 3875 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.82105737496201e-05, - "loss": 0.8883, - "step": 3876 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8209435014421384e-05, - "loss": 0.6884, - "step": 3877 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.820829595263728e-05, - "loss": 0.7177, - "step": 3878 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.82071565643131e-05, - "loss": 0.8787, - "step": 3879 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.820601684949417e-05, - "loss": 0.8306, - "step": 3880 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8204876808225835e-05, - "loss": 0.9228, - "step": 3881 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8203736440553442e-05, - "loss": 0.8054, - "step": 3882 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.820259574652236e-05, - "loss": 0.8196, - "step": 3883 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8201454726177967e-05, - "loss": 0.827, - "step": 3884 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8200313379565657e-05, - "loss": 0.7746, - "step": 3885 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.819917170673083e-05, - "loss": 0.6763, - "step": 3886 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.819802970771891e-05, - "loss": 0.8839, - "step": 3887 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8196887382575322e-05, - "loss": 0.6719, - "step": 3888 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8195744731345516e-05, - "loss": 0.7809, - "step": 3889 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.819460175407494e-05, - "loss": 0.825, - "step": 3890 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.819345845080907e-05, - "loss": 0.8399, - "step": 3891 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8192314821593387e-05, - "loss": 0.8515, - "step": 3892 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8191170866473384e-05, - "loss": 0.8575, - "step": 3893 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8190026585494572e-05, - "loss": 0.8258, - "step": 3894 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.8188881978702473e-05, - "loss": 0.8409, - "step": 3895 - }, - { - "epoch": 0.7, - "grad_norm": 0.0, - "learning_rate": 1.818773704614262e-05, - "loss": 0.7446, - "step": 3896 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8186591787860562e-05, - "loss": 0.8579, - "step": 3897 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8185446203901857e-05, - "loss": 0.7402, - "step": 3898 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8184300294312082e-05, - "loss": 0.884, - "step": 3899 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.818315405913682e-05, - "loss": 0.8899, - "step": 3900 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8182007498421666e-05, - "loss": 0.7693, - "step": 3901 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8180860612212244e-05, - "loss": 0.772, - "step": 3902 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.817971340055417e-05, - "loss": 0.9495, - "step": 3903 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8178565863493084e-05, - "loss": 0.8953, - "step": 3904 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8177418001074637e-05, - "loss": 0.7496, - "step": 3905 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8176269813344495e-05, - "loss": 0.7554, - "step": 3906 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.817512130034833e-05, - "loss": 0.9022, - "step": 3907 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8173972462131834e-05, - "loss": 0.8437, - "step": 3908 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8172823298740714e-05, - "loss": 0.8911, - "step": 3909 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8171673810220682e-05, - "loss": 0.8654, - "step": 3910 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8170523996617466e-05, - "loss": 0.6684, - "step": 3911 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8169373857976808e-05, - "loss": 0.8253, - "step": 3912 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8168223394344465e-05, - "loss": 0.8965, - "step": 3913 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.81670726057662e-05, - "loss": 0.6871, - "step": 3914 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8165921492287803e-05, - "loss": 0.7484, - "step": 3915 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.816477005395505e-05, - "loss": 0.7759, - "step": 3916 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8163618290813762e-05, - "loss": 0.8206, - "step": 3917 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8162466202909753e-05, - "loss": 0.7854, - "step": 3918 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8161313790288856e-05, - "loss": 0.8587, - "step": 3919 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8160161052996913e-05, - "loss": 0.7821, - "step": 3920 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8159007991079786e-05, - "loss": 0.7016, - "step": 3921 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.815785460458334e-05, - "loss": 0.8116, - "step": 3922 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.815670089355347e-05, - "loss": 0.7919, - "step": 3923 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8155546858036063e-05, - "loss": 0.8547, - "step": 3924 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8154392498077028e-05, - "loss": 0.7275, - "step": 3925 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.815323781372229e-05, - "loss": 0.7183, - "step": 3926 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8152082805017785e-05, - "loss": 0.8493, - "step": 3927 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8150927472009462e-05, - "loss": 0.759, - "step": 3928 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8149771814743283e-05, - "loss": 0.8176, - "step": 3929 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8148615833265215e-05, - "loss": 0.8688, - "step": 3930 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.814745952762125e-05, - "loss": 0.9787, - "step": 3931 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8146302897857392e-05, - "loss": 0.9852, - "step": 3932 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.814514594401965e-05, - "loss": 0.7663, - "step": 3933 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8143988666154044e-05, - "loss": 0.9107, - "step": 3934 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.814283106430662e-05, - "loss": 0.7439, - "step": 3935 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8141673138523428e-05, - "loss": 0.7615, - "step": 3936 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8140514888850525e-05, - "loss": 0.7672, - "step": 3937 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8139356315334002e-05, - "loss": 0.8224, - "step": 3938 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8138197418019938e-05, - "loss": 0.9663, - "step": 3939 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.813703819695444e-05, - "loss": 0.8466, - "step": 3940 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.813587865218362e-05, - "loss": 0.8415, - "step": 3941 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8134718783753613e-05, - "loss": 0.8081, - "step": 3942 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8133558591710553e-05, - "loss": 0.7709, - "step": 3943 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8132398076100605e-05, - "loss": 0.8604, - "step": 3944 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8131237236969927e-05, - "loss": 0.8449, - "step": 3945 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.81300760743647e-05, - "loss": 0.7471, - "step": 3946 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.812891458833112e-05, - "loss": 0.7494, - "step": 3947 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8127752778915396e-05, - "loss": 0.7587, - "step": 3948 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.812659064616374e-05, - "loss": 0.6228, - "step": 3949 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.8125428190122384e-05, - "loss": 0.7635, - "step": 3950 - }, - { - "epoch": 0.71, - "grad_norm": 0.0, - "learning_rate": 1.812426541083758e-05, - "loss": 0.6944, - "step": 3951 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8123102308355576e-05, - "loss": 0.7237, - "step": 3952 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.812193888272265e-05, - "loss": 0.7991, - "step": 3953 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8120775133985077e-05, - "loss": 0.8834, - "step": 3954 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.811961106218916e-05, - "loss": 0.8295, - "step": 3955 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8118446667381208e-05, - "loss": 0.7545, - "step": 3956 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8117281949607534e-05, - "loss": 0.7807, - "step": 3957 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8116116908914482e-05, - "loss": 0.9043, - "step": 3958 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8114951545348398e-05, - "loss": 0.8882, - "step": 3959 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8113785858955636e-05, - "loss": 0.7208, - "step": 3960 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8112619849782574e-05, - "loss": 0.8568, - "step": 3961 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8111453517875597e-05, - "loss": 0.7508, - "step": 3962 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8110286863281103e-05, - "loss": 0.9131, - "step": 3963 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8109119886045504e-05, - "loss": 0.9382, - "step": 3964 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8107952586215225e-05, - "loss": 0.7788, - "step": 3965 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.81067849638367e-05, - "loss": 0.8684, - "step": 3966 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8105617018956385e-05, - "loss": 0.7596, - "step": 3967 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.810444875162074e-05, - "loss": 0.7018, - "step": 3968 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8103280161876235e-05, - "loss": 0.7171, - "step": 3969 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8102111249769366e-05, - "loss": 0.7311, - "step": 3970 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.810094201534663e-05, - "loss": 0.8517, - "step": 3971 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8099772458654547e-05, - "loss": 0.7684, - "step": 3972 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8098602579739638e-05, - "loss": 0.8461, - "step": 3973 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8097432378648445e-05, - "loss": 0.8562, - "step": 3974 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.809626185542752e-05, - "loss": 0.6729, - "step": 3975 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.809509101012343e-05, - "loss": 0.709, - "step": 3976 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8093919842782748e-05, - "loss": 0.7447, - "step": 3977 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.809274835345207e-05, - "loss": 0.7978, - "step": 3978 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8091576542178e-05, - "loss": 0.757, - "step": 3979 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8090404409007155e-05, - "loss": 0.6712, - "step": 3980 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8089231953986162e-05, - "loss": 0.8324, - "step": 3981 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8088059177161662e-05, - "loss": 0.9445, - "step": 3982 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8086886078580317e-05, - "loss": 0.7233, - "step": 3983 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8085712658288787e-05, - "loss": 0.7692, - "step": 3984 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8084538916333754e-05, - "loss": 0.7182, - "step": 3985 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8083364852761917e-05, - "loss": 0.8902, - "step": 3986 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.808219046761998e-05, - "loss": 0.6922, - "step": 3987 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8081015760954653e-05, - "loss": 0.8398, - "step": 3988 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.807984073281268e-05, - "loss": 0.7105, - "step": 3989 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8078665383240804e-05, - "loss": 0.7735, - "step": 3990 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8077489712285778e-05, - "loss": 0.8728, - "step": 3991 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.807631371999437e-05, - "loss": 0.9565, - "step": 3992 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.807513740641337e-05, - "loss": 1.0249, - "step": 3993 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.807396077158957e-05, - "loss": 0.8463, - "step": 3994 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.807278381556978e-05, - "loss": 0.8494, - "step": 3995 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8071606538400817e-05, - "loss": 0.9669, - "step": 3996 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8070428940129524e-05, - "loss": 0.8557, - "step": 3997 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.806925102080274e-05, - "loss": 0.6916, - "step": 3998 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8068072780467326e-05, - "loss": 0.9229, - "step": 3999 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8066894219170156e-05, - "loss": 0.8582, - "step": 4000 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8065715336958112e-05, - "loss": 0.7936, - "step": 4001 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.80645361338781e-05, - "loss": 0.92, - "step": 4002 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8063356609977018e-05, - "loss": 0.7899, - "step": 4003 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8062176765301802e-05, - "loss": 0.6829, - "step": 4004 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8060996599899383e-05, - "loss": 0.7777, - "step": 4005 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8059816113816708e-05, - "loss": 0.8409, - "step": 4006 - }, - { - "epoch": 0.72, - "grad_norm": 0.0, - "learning_rate": 1.8058635307100742e-05, - "loss": 0.7374, - "step": 4007 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8057454179798456e-05, - "loss": 0.7743, - "step": 4008 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8056272731956842e-05, - "loss": 0.8578, - "step": 4009 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8055090963622898e-05, - "loss": 0.8328, - "step": 4010 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8053908874843636e-05, - "loss": 0.8133, - "step": 4011 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.805272646566608e-05, - "loss": 0.7371, - "step": 4012 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8051543736137272e-05, - "loss": 0.8715, - "step": 4013 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.805036068630426e-05, - "loss": 0.7562, - "step": 4014 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804917731621411e-05, - "loss": 0.762, - "step": 4015 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8047993625913897e-05, - "loss": 0.798, - "step": 4016 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804680961545071e-05, - "loss": 0.7604, - "step": 4017 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804562528487165e-05, - "loss": 0.7681, - "step": 4018 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8044440634223836e-05, - "loss": 0.8293, - "step": 4019 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804325566355439e-05, - "loss": 0.6899, - "step": 4020 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804207037291046e-05, - "loss": 0.706, - "step": 4021 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.804088476233919e-05, - "loss": 0.7795, - "step": 4022 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8039698831887748e-05, - "loss": 0.7442, - "step": 4023 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8038512581603314e-05, - "loss": 0.7083, - "step": 4024 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8037326011533084e-05, - "loss": 0.7209, - "step": 4025 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8036139121724247e-05, - "loss": 0.7799, - "step": 4026 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8034951912224037e-05, - "loss": 0.7881, - "step": 4027 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.803376438307967e-05, - "loss": 0.7279, - "step": 4028 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.80325765343384e-05, - "loss": 0.8075, - "step": 4029 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8031388366047472e-05, - "loss": 0.8935, - "step": 4030 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8030199878254154e-05, - "loss": 0.7101, - "step": 4031 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8029011071005728e-05, - "loss": 0.7208, - "step": 4032 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8027821944349492e-05, - "loss": 0.7533, - "step": 4033 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.802663249833274e-05, - "loss": 0.8231, - "step": 4034 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.80254427330028e-05, - "loss": 0.7996, - "step": 4035 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8024252648407002e-05, - "loss": 0.8665, - "step": 4036 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8023062244592686e-05, - "loss": 0.598, - "step": 4037 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.802187152160721e-05, - "loss": 0.9435, - "step": 4038 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.802068047949794e-05, - "loss": 0.9031, - "step": 4039 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8019489118312263e-05, - "loss": 0.8649, - "step": 4040 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8018297438097568e-05, - "loss": 0.9297, - "step": 4041 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8017105438901266e-05, - "loss": 0.7713, - "step": 4042 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8015913120770778e-05, - "loss": 0.7485, - "step": 4043 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.801472048375353e-05, - "loss": 0.8463, - "step": 4044 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8013527527896974e-05, - "loss": 0.8222, - "step": 4045 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8012334253248567e-05, - "loss": 0.7588, - "step": 4046 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8011140659855773e-05, - "loss": 0.6958, - "step": 4047 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8009946747766084e-05, - "loss": 0.7773, - "step": 4048 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8008752517026988e-05, - "loss": 0.8624, - "step": 4049 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8007557967685997e-05, - "loss": 0.9584, - "step": 4050 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8006363099790635e-05, - "loss": 0.9479, - "step": 4051 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.800516791338843e-05, - "loss": 0.8273, - "step": 4052 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8003972408526934e-05, - "loss": 0.8253, - "step": 4053 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.8002776585253702e-05, - "loss": 0.7401, - "step": 4054 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.800158044361631e-05, - "loss": 0.7532, - "step": 4055 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.800038398366234e-05, - "loss": 0.98, - "step": 4056 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.799918720543939e-05, - "loss": 0.8411, - "step": 4057 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.799799010899507e-05, - "loss": 0.7273, - "step": 4058 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.7996792694377002e-05, - "loss": 0.7874, - "step": 4059 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.799559496163282e-05, - "loss": 0.6826, - "step": 4060 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.7994396910810175e-05, - "loss": 0.7832, - "step": 4061 - }, - { - "epoch": 0.73, - "grad_norm": 0.0, - "learning_rate": 1.7993198541956725e-05, - "loss": 0.79, - "step": 4062 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7991999855120147e-05, - "loss": 1.0009, - "step": 4063 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.799080085034812e-05, - "loss": 0.7683, - "step": 4064 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7989601527688344e-05, - "loss": 0.8676, - "step": 4065 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7988401887188534e-05, - "loss": 0.8528, - "step": 4066 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7987201928896414e-05, - "loss": 0.8286, - "step": 4067 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7986001652859717e-05, - "loss": 0.8922, - "step": 4068 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7984801059126193e-05, - "loss": 0.9343, - "step": 4069 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.798360014774361e-05, - "loss": 0.7746, - "step": 4070 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7982398918759728e-05, - "loss": 0.7937, - "step": 4071 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7981197372222342e-05, - "loss": 0.744, - "step": 4072 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.797999550817926e-05, - "loss": 0.8432, - "step": 4073 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7978793326678282e-05, - "loss": 0.719, - "step": 4074 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7977590827767235e-05, - "loss": 0.9579, - "step": 4075 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7976388011493963e-05, - "loss": 0.848, - "step": 4076 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.797518487790631e-05, - "loss": 0.7671, - "step": 4077 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7973981427052137e-05, - "loss": 0.8591, - "step": 4078 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7972777658979326e-05, - "loss": 0.7721, - "step": 4079 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.797157357373576e-05, - "loss": 0.8433, - "step": 4080 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.797036917136934e-05, - "loss": 0.7751, - "step": 4081 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7969164451927984e-05, - "loss": 0.704, - "step": 4082 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7967959415459614e-05, - "loss": 0.9806, - "step": 4083 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7966754062012163e-05, - "loss": 0.8908, - "step": 4084 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7965548391633595e-05, - "loss": 0.8272, - "step": 4085 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.796434240437186e-05, - "loss": 0.943, - "step": 4086 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7963136100274944e-05, - "loss": 0.9046, - "step": 4087 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.796192947939083e-05, - "loss": 0.8229, - "step": 4088 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7960722541767527e-05, - "loss": 0.6564, - "step": 4089 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.795951528745304e-05, - "loss": 0.7456, - "step": 4090 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7958307716495402e-05, - "loss": 0.8694, - "step": 4091 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7957099828942643e-05, - "loss": 0.872, - "step": 4092 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7955891624842826e-05, - "loss": 0.8098, - "step": 4093 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.795468310424401e-05, - "loss": 0.7892, - "step": 4094 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7953474267194276e-05, - "loss": 0.8739, - "step": 4095 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7952265113741707e-05, - "loss": 0.7966, - "step": 4096 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.795105564393441e-05, - "loss": 0.8813, - "step": 4097 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.79498458578205e-05, - "loss": 0.8208, - "step": 4098 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.79486357554481e-05, - "loss": 0.8629, - "step": 4099 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7947425336865358e-05, - "loss": 0.9155, - "step": 4100 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7946214602120417e-05, - "loss": 0.8308, - "step": 4101 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7945003551261446e-05, - "loss": 0.8847, - "step": 4102 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7943792184336627e-05, - "loss": 0.9085, - "step": 4103 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.794258050139414e-05, - "loss": 0.7379, - "step": 4104 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.79413685024822e-05, - "loss": 0.8501, - "step": 4105 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7940156187649017e-05, - "loss": 0.7374, - "step": 4106 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7938943556942817e-05, - "loss": 0.8841, - "step": 4107 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7937730610411842e-05, - "loss": 0.8531, - "step": 4108 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7936517348104344e-05, - "loss": 0.9081, - "step": 4109 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7935303770068592e-05, - "loss": 0.7172, - "step": 4110 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.793408987635286e-05, - "loss": 0.8166, - "step": 4111 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7932875667005443e-05, - "loss": 0.9613, - "step": 4112 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.793166114207464e-05, - "loss": 0.8429, - "step": 4113 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.793044630160877e-05, - "loss": 0.9456, - "step": 4114 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.7929231145656157e-05, - "loss": 0.7989, - "step": 4115 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.792801567426515e-05, - "loss": 0.8122, - "step": 4116 - }, - { - "epoch": 0.74, - "grad_norm": 0.0, - "learning_rate": 1.79267998874841e-05, - "loss": 0.7132, - "step": 4117 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.792558378536137e-05, - "loss": 0.6908, - "step": 4118 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7924367367945336e-05, - "loss": 0.8929, - "step": 4119 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7923150635284395e-05, - "loss": 0.7991, - "step": 4120 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.792193358742695e-05, - "loss": 0.8148, - "step": 4121 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7920716224421413e-05, - "loss": 0.736, - "step": 4122 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7919498546316215e-05, - "loss": 0.953, - "step": 4123 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7918280553159802e-05, - "loss": 0.8614, - "step": 4124 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7917062245000622e-05, - "loss": 0.9979, - "step": 4125 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7915843621887144e-05, - "loss": 0.7586, - "step": 4126 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7914624683867844e-05, - "loss": 0.7761, - "step": 4127 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7913405430991218e-05, - "loss": 0.7582, - "step": 4128 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7912185863305765e-05, - "loss": 0.7267, - "step": 4129 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7910965980860005e-05, - "loss": 0.7778, - "step": 4130 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7909745783702468e-05, - "loss": 0.8275, - "step": 4131 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.790852527188169e-05, - "loss": 0.7265, - "step": 4132 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7907304445446233e-05, - "loss": 0.7141, - "step": 4133 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7906083304444656e-05, - "loss": 0.8209, - "step": 4134 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7904861848925543e-05, - "loss": 0.7715, - "step": 4135 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7903640078937482e-05, - "loss": 0.8581, - "step": 4136 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.790241799452908e-05, - "loss": 0.8249, - "step": 4137 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.790119559574895e-05, - "loss": 0.666, - "step": 4138 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7899972882645727e-05, - "loss": 0.7601, - "step": 4139 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7898749855268048e-05, - "loss": 0.7833, - "step": 4140 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.789752651366457e-05, - "loss": 0.8177, - "step": 4141 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7896302857883957e-05, - "loss": 0.9399, - "step": 4142 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7895078887974892e-05, - "loss": 0.8346, - "step": 4143 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.789385460398606e-05, - "loss": 0.7626, - "step": 4144 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.789263000596617e-05, - "loss": 0.8738, - "step": 4145 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.789140509396394e-05, - "loss": 0.8214, - "step": 4146 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.789017986802809e-05, - "loss": 0.928, - "step": 4147 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7888954328207376e-05, - "loss": 0.6215, - "step": 4148 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.788772847455054e-05, - "loss": 0.791, - "step": 4149 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.788650230710635e-05, - "loss": 0.8234, - "step": 4150 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7885275825923597e-05, - "loss": 0.8401, - "step": 4151 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7884049031051053e-05, - "loss": 0.7952, - "step": 4152 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.788282192253754e-05, - "loss": 0.8108, - "step": 4153 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.788159450043186e-05, - "loss": 0.873, - "step": 4154 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7880366764782856e-05, - "loss": 0.8383, - "step": 4155 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.787913871563936e-05, - "loss": 0.7195, - "step": 4156 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.787791035305023e-05, - "loss": 0.7828, - "step": 4157 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7876681677064323e-05, - "loss": 0.7877, - "step": 4158 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7875452687730533e-05, - "loss": 0.698, - "step": 4159 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.787422338509774e-05, - "loss": 0.8184, - "step": 4160 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7872993769214852e-05, - "loss": 0.8211, - "step": 4161 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7871763840130787e-05, - "loss": 0.9168, - "step": 4162 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.787053359789447e-05, - "loss": 0.8049, - "step": 4163 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7869303042554844e-05, - "loss": 0.6758, - "step": 4164 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.786807217416086e-05, - "loss": 0.7729, - "step": 4165 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.786684099276149e-05, - "loss": 0.7581, - "step": 4166 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7865609498405704e-05, - "loss": 0.9146, - "step": 4167 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.78643776911425e-05, - "loss": 0.7396, - "step": 4168 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7863145571020876e-05, - "loss": 0.7237, - "step": 4169 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.786191313808986e-05, - "loss": 0.8286, - "step": 4170 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.786068039239846e-05, - "loss": 0.9682, - "step": 4171 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7859447333995736e-05, - "loss": 0.9094, - "step": 4172 - }, - { - "epoch": 0.75, - "grad_norm": 0.0, - "learning_rate": 1.7858213962930727e-05, - "loss": 0.9053, - "step": 4173 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7856980279252506e-05, - "loss": 0.7386, - "step": 4174 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7855746283010152e-05, - "loss": 0.796, - "step": 4175 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.785451197425275e-05, - "loss": 0.9152, - "step": 4176 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.785327735302941e-05, - "loss": 0.8336, - "step": 4177 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7852042419389244e-05, - "loss": 0.7778, - "step": 4178 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7850807173381377e-05, - "loss": 0.8204, - "step": 4179 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7849571615054953e-05, - "loss": 0.7193, - "step": 4180 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.784833574445912e-05, - "loss": 0.9405, - "step": 4181 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.784709956164305e-05, - "loss": 0.9903, - "step": 4182 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7845863066655913e-05, - "loss": 0.7567, - "step": 4183 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7844626259546904e-05, - "loss": 0.8946, - "step": 4184 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.784338914036522e-05, - "loss": 0.8216, - "step": 4185 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7842151709160086e-05, - "loss": 0.8217, - "step": 4186 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.784091396598072e-05, - "loss": 0.7394, - "step": 4187 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7839675910876365e-05, - "loss": 0.7357, - "step": 4188 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.783843754389627e-05, - "loss": 0.8388, - "step": 4189 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7837198865089705e-05, - "loss": 0.8661, - "step": 4190 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.783595987450594e-05, - "loss": 0.7652, - "step": 4191 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7834720572194268e-05, - "loss": 0.8131, - "step": 4192 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.783348095820399e-05, - "loss": 0.7449, - "step": 4193 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.783224103258442e-05, - "loss": 0.9406, - "step": 4194 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.783100079538489e-05, - "loss": 0.7239, - "step": 4195 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7829760246654728e-05, - "loss": 0.9898, - "step": 4196 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7828519386443287e-05, - "loss": 0.7846, - "step": 4197 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.782727821479994e-05, - "loss": 0.8109, - "step": 4198 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7826036731774054e-05, - "loss": 0.8896, - "step": 4199 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7824794937415018e-05, - "loss": 0.837, - "step": 4200 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7823552831772234e-05, - "loss": 0.7843, - "step": 4201 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.782231041489512e-05, - "loss": 0.8644, - "step": 4202 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.78210676868331e-05, - "loss": 0.7543, - "step": 4203 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7819824647635602e-05, - "loss": 0.894, - "step": 4204 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7818581297352086e-05, - "loss": 0.9104, - "step": 4205 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7817337636032013e-05, - "loss": 0.8154, - "step": 4206 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7816093663724858e-05, - "loss": 0.8261, - "step": 4207 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7814849380480103e-05, - "loss": 0.9601, - "step": 4208 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7813604786347257e-05, - "loss": 0.8721, - "step": 4209 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7812359881375824e-05, - "loss": 0.7375, - "step": 4210 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7811114665615334e-05, - "loss": 0.7495, - "step": 4211 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7809869139115318e-05, - "loss": 0.7394, - "step": 4212 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7808623301925335e-05, - "loss": 0.7772, - "step": 4213 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7807377154094933e-05, - "loss": 0.8106, - "step": 4214 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7806130695673697e-05, - "loss": 0.8462, - "step": 4215 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.780488392671121e-05, - "loss": 0.8075, - "step": 4216 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7803636847257067e-05, - "loss": 0.8175, - "step": 4217 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7802389457360882e-05, - "loss": 0.7901, - "step": 4218 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.780114175707228e-05, - "loss": 0.7687, - "step": 4219 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7799893746440893e-05, - "loss": 0.8388, - "step": 4220 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7798645425516368e-05, - "loss": 0.7656, - "step": 4221 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7797396794348375e-05, - "loss": 0.8849, - "step": 4222 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7796147852986575e-05, - "loss": 0.7493, - "step": 4223 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.779489860148066e-05, - "loss": 0.9142, - "step": 4224 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7793649039880326e-05, - "loss": 0.8058, - "step": 4225 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.779239916823528e-05, - "loss": 0.8206, - "step": 4226 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.7791148986595247e-05, - "loss": 0.8765, - "step": 4227 - }, - { - "epoch": 0.76, - "grad_norm": 0.0, - "learning_rate": 1.778989849500996e-05, - "loss": 0.8558, - "step": 4228 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7788647693529167e-05, - "loss": 0.8212, - "step": 4229 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7787396582202625e-05, - "loss": 0.8081, - "step": 4230 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7786145161080105e-05, - "loss": 0.7308, - "step": 4231 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7784893430211396e-05, - "loss": 0.749, - "step": 4232 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7783641389646288e-05, - "loss": 0.8375, - "step": 4233 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.778238903943459e-05, - "loss": 0.7449, - "step": 4234 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7781136379626127e-05, - "loss": 0.8581, - "step": 4235 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7779883410270727e-05, - "loss": 0.7615, - "step": 4236 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7778630131418237e-05, - "loss": 0.7699, - "step": 4237 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7777376543118513e-05, - "loss": 0.8935, - "step": 4238 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.777612264542143e-05, - "loss": 0.8331, - "step": 4239 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7774868438376868e-05, - "loss": 0.7752, - "step": 4240 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7773613922034717e-05, - "loss": 0.8025, - "step": 4241 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.777235909644489e-05, - "loss": 0.7859, - "step": 4242 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.77711039616573e-05, - "loss": 0.9149, - "step": 4243 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7769848517721884e-05, - "loss": 0.7938, - "step": 4244 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.776859276468858e-05, - "loss": 0.8201, - "step": 4245 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7767336702607352e-05, - "loss": 0.8977, - "step": 4246 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7766080331528165e-05, - "loss": 0.9321, - "step": 4247 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7764823651500995e-05, - "loss": 0.8271, - "step": 4248 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7763566662575837e-05, - "loss": 0.8226, - "step": 4249 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.77623093648027e-05, - "loss": 0.6959, - "step": 4250 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.77610517582316e-05, - "loss": 0.7608, - "step": 4251 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7759793842912562e-05, - "loss": 0.8401, - "step": 4252 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7758535618895636e-05, - "loss": 0.9722, - "step": 4253 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.775727708623087e-05, - "loss": 0.9489, - "step": 4254 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7756018244968332e-05, - "loss": 0.673, - "step": 4255 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.77547590951581e-05, - "loss": 0.81, - "step": 4256 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.775349963685027e-05, - "loss": 0.7353, - "step": 4257 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.775223987009494e-05, - "loss": 0.8152, - "step": 4258 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7750979794942226e-05, - "loss": 0.7184, - "step": 4259 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7749719411442262e-05, - "loss": 0.7209, - "step": 4260 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7748458719645182e-05, - "loss": 0.9059, - "step": 4261 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7747197719601137e-05, - "loss": 0.7073, - "step": 4262 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7745936411360295e-05, - "loss": 0.9188, - "step": 4263 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7744674794972837e-05, - "loss": 0.7395, - "step": 4264 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7743412870488946e-05, - "loss": 0.7545, - "step": 4265 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7742150637958827e-05, - "loss": 0.7289, - "step": 4266 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7740888097432694e-05, - "loss": 0.828, - "step": 4267 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.773962524896077e-05, - "loss": 0.7737, - "step": 4268 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7738362092593297e-05, - "loss": 0.8468, - "step": 4269 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7737098628380517e-05, - "loss": 0.6663, - "step": 4270 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7735834856372705e-05, - "loss": 0.8744, - "step": 4271 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.773457077662013e-05, - "loss": 0.9051, - "step": 4272 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7733306389173077e-05, - "loss": 0.6221, - "step": 4273 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7732041694081848e-05, - "loss": 0.8102, - "step": 4274 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7730776691396756e-05, - "loss": 0.8143, - "step": 4275 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7729511381168123e-05, - "loss": 0.8071, - "step": 4276 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7728245763446286e-05, - "loss": 0.8034, - "step": 4277 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7726979838281593e-05, - "loss": 0.6991, - "step": 4278 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7725713605724405e-05, - "loss": 0.8582, - "step": 4279 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7724447065825097e-05, - "loss": 0.7608, - "step": 4280 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.772318021863405e-05, - "loss": 0.8386, - "step": 4281 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.7721913064201665e-05, - "loss": 0.8742, - "step": 4282 - }, - { - "epoch": 0.77, - "grad_norm": 0.0, - "learning_rate": 1.772064560257835e-05, - "loss": 0.7782, - "step": 4283 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7719377833814523e-05, - "loss": 0.8432, - "step": 4284 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7718109757960623e-05, - "loss": 0.697, - "step": 4285 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.77168413750671e-05, - "loss": 0.9278, - "step": 4286 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7715572685184403e-05, - "loss": 0.7806, - "step": 4287 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7714303688363008e-05, - "loss": 0.8132, - "step": 4288 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7713034384653393e-05, - "loss": 0.8769, - "step": 4289 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.771176477410606e-05, - "loss": 0.847, - "step": 4290 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7710494856771515e-05, - "loss": 0.8522, - "step": 4291 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7709224632700275e-05, - "loss": 0.7704, - "step": 4292 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7707954101942873e-05, - "loss": 0.7947, - "step": 4293 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.770668326454985e-05, - "loss": 0.6719, - "step": 4294 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7705412120571767e-05, - "loss": 0.706, - "step": 4295 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7704140670059187e-05, - "loss": 0.8905, - "step": 4296 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7702868913062698e-05, - "loss": 0.8643, - "step": 4297 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7701596849632883e-05, - "loss": 0.7606, - "step": 4298 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7700324479820352e-05, - "loss": 0.8289, - "step": 4299 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7699051803675723e-05, - "loss": 0.8046, - "step": 4300 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7697778821249625e-05, - "loss": 0.7697, - "step": 4301 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.76965055325927e-05, - "loss": 0.7237, - "step": 4302 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7695231937755598e-05, - "loss": 0.8464, - "step": 4303 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7693958036788984e-05, - "loss": 0.791, - "step": 4304 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7692683829743546e-05, - "loss": 0.7318, - "step": 4305 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7691409316669958e-05, - "loss": 0.7984, - "step": 4306 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.769013449761894e-05, - "loss": 0.8052, - "step": 4307 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.768885937264119e-05, - "loss": 0.8957, - "step": 4308 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7687583941787447e-05, - "loss": 0.7947, - "step": 4309 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.768630820510844e-05, - "loss": 0.8397, - "step": 4310 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7685032162654932e-05, - "loss": 0.7003, - "step": 4311 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.768375581447768e-05, - "loss": 0.791, - "step": 4312 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7682479160627454e-05, - "loss": 0.7728, - "step": 4313 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7681202201155046e-05, - "loss": 0.7785, - "step": 4314 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7679924936111258e-05, - "loss": 0.7988, - "step": 4315 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.76786473655469e-05, - "loss": 0.7644, - "step": 4316 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7677369489512795e-05, - "loss": 0.96, - "step": 4317 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7676091308059778e-05, - "loss": 0.8784, - "step": 4318 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7674812821238702e-05, - "loss": 0.8243, - "step": 4319 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7673534029100422e-05, - "loss": 0.7897, - "step": 4320 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7672254931695812e-05, - "loss": 0.7982, - "step": 4321 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.767097552907576e-05, - "loss": 0.7672, - "step": 4322 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7669695821291156e-05, - "loss": 0.87, - "step": 4323 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7668415808392916e-05, - "loss": 0.7624, - "step": 4324 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7667135490431963e-05, - "loss": 0.8183, - "step": 4325 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.766585486745922e-05, - "loss": 0.6808, - "step": 4326 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.766457393952564e-05, - "loss": 0.8963, - "step": 4327 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.766329270668218e-05, - "loss": 0.6923, - "step": 4328 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7662011168979805e-05, - "loss": 0.8409, - "step": 4329 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.76607293264695e-05, - "loss": 0.8064, - "step": 4330 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.765944717920226e-05, - "loss": 0.797, - "step": 4331 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.765816472722909e-05, - "loss": 0.7193, - "step": 4332 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.765688197060101e-05, - "loss": 0.6873, - "step": 4333 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7655598909369045e-05, - "loss": 0.8708, - "step": 4334 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7654315543584243e-05, - "loss": 0.7728, - "step": 4335 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7653031873297653e-05, - "loss": 0.8451, - "step": 4336 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7651747898560344e-05, - "loss": 0.7928, - "step": 4337 - }, - { - "epoch": 0.78, - "grad_norm": 0.0, - "learning_rate": 1.7650463619423402e-05, - "loss": 0.909, - "step": 4338 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7649179035937907e-05, - "loss": 0.7421, - "step": 4339 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7647894148154968e-05, - "loss": 0.8206, - "step": 4340 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7646608956125695e-05, - "loss": 0.8426, - "step": 4341 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.764532345990122e-05, - "loss": 1.0155, - "step": 4342 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.764403765953268e-05, - "loss": 0.8228, - "step": 4343 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.764275155507123e-05, - "loss": 0.9214, - "step": 4344 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7641465146568025e-05, - "loss": 0.779, - "step": 4345 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.764017843407425e-05, - "loss": 0.8175, - "step": 4346 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7638891417641087e-05, - "loss": 0.8746, - "step": 4347 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7637604097319738e-05, - "loss": 0.7952, - "step": 4348 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7636316473161413e-05, - "loss": 0.8339, - "step": 4349 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7635028545217337e-05, - "loss": 0.7428, - "step": 4350 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.763374031353875e-05, - "loss": 0.8928, - "step": 4351 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.763245177817689e-05, - "loss": 0.8346, - "step": 4352 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7631162939183025e-05, - "loss": 0.7805, - "step": 4353 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7629873796608427e-05, - "loss": 0.9414, - "step": 4354 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7628584350504377e-05, - "loss": 0.7546, - "step": 4355 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7627294600922175e-05, - "loss": 0.8311, - "step": 4356 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7626004547913126e-05, - "loss": 0.7678, - "step": 4357 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7624714191528555e-05, - "loss": 0.7523, - "step": 4358 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7623423531819788e-05, - "loss": 0.6121, - "step": 4359 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7622132568838172e-05, - "loss": 0.7492, - "step": 4360 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7620841302635068e-05, - "loss": 0.829, - "step": 4361 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.761954973326184e-05, - "loss": 0.7893, - "step": 4362 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7618257860769873e-05, - "loss": 0.8406, - "step": 4363 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7616965685210554e-05, - "loss": 0.773, - "step": 4364 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7615673206635294e-05, - "loss": 0.7339, - "step": 4365 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7614380425095506e-05, - "loss": 0.7721, - "step": 4366 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.761308734064262e-05, - "loss": 0.7769, - "step": 4367 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.761179395332808e-05, - "loss": 0.8179, - "step": 4368 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.761050026320334e-05, - "loss": 0.9408, - "step": 4369 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7609206270319856e-05, - "loss": 0.8555, - "step": 4370 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.760791197472911e-05, - "loss": 0.8093, - "step": 4371 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.76066173764826e-05, - "loss": 0.6882, - "step": 4372 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7605322475631815e-05, - "loss": 0.8392, - "step": 4373 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7604027272228275e-05, - "loss": 0.8371, - "step": 4374 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7602731766323502e-05, - "loss": 0.7024, - "step": 4375 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7601435957969035e-05, - "loss": 0.6892, - "step": 4376 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7600139847216426e-05, - "loss": 0.6865, - "step": 4377 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7598843434117233e-05, - "loss": 0.9235, - "step": 4378 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.759754671872303e-05, - "loss": 0.8678, - "step": 4379 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7596249701085403e-05, - "loss": 0.8369, - "step": 4380 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.759495238125595e-05, - "loss": 0.6889, - "step": 4381 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.759365475928628e-05, - "loss": 0.7405, - "step": 4382 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7592356835228016e-05, - "loss": 0.9101, - "step": 4383 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.759105860913279e-05, - "loss": 0.7779, - "step": 4384 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7589760081052247e-05, - "loss": 1.0009, - "step": 4385 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.758846125103805e-05, - "loss": 0.7194, - "step": 4386 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.758716211914186e-05, - "loss": 0.7315, - "step": 4387 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7585862685415368e-05, - "loss": 0.8237, - "step": 4388 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.758456294991026e-05, - "loss": 0.8708, - "step": 4389 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7583262912678246e-05, - "loss": 0.8822, - "step": 4390 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.758196257377104e-05, - "loss": 0.8441, - "step": 4391 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.758066193324038e-05, - "loss": 0.8206, - "step": 4392 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.7579360991137995e-05, - "loss": 0.8265, - "step": 4393 - }, - { - "epoch": 0.79, - "grad_norm": 0.0, - "learning_rate": 1.757805974751565e-05, - "loss": 0.7681, - "step": 4394 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7576758202425105e-05, - "loss": 0.8624, - "step": 4395 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.757545635591814e-05, - "loss": 0.7359, - "step": 4396 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.757415420804654e-05, - "loss": 0.6814, - "step": 4397 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7572851758862115e-05, - "loss": 0.81, - "step": 4398 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7571549008416672e-05, - "loss": 0.6577, - "step": 4399 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.757024595676204e-05, - "loss": 0.7308, - "step": 4400 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7568942603950052e-05, - "loss": 0.7314, - "step": 4401 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7567638950032565e-05, - "loss": 0.8661, - "step": 4402 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7566334995061433e-05, - "loss": 0.7904, - "step": 4403 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7565030739088533e-05, - "loss": 0.8619, - "step": 4404 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7563726182165752e-05, - "loss": 0.7919, - "step": 4405 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.756242132434499e-05, - "loss": 0.794, - "step": 4406 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7561116165678144e-05, - "loss": 0.7434, - "step": 4407 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.755981070621715e-05, - "loss": 0.8992, - "step": 4408 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7558504946013933e-05, - "loss": 0.7688, - "step": 4409 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7557198885120444e-05, - "loss": 0.8015, - "step": 4410 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7555892523588636e-05, - "loss": 0.7813, - "step": 4411 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.755458586147048e-05, - "loss": 0.8504, - "step": 4412 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7553278898817953e-05, - "loss": 0.8806, - "step": 4413 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7551971635683053e-05, - "loss": 0.9177, - "step": 4414 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7550664072117787e-05, - "loss": 0.8658, - "step": 4415 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7549356208174167e-05, - "loss": 0.7894, - "step": 4416 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7548048043904225e-05, - "loss": 0.7819, - "step": 4417 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7546739579359998e-05, - "loss": 0.7486, - "step": 4418 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.754543081459355e-05, - "loss": 0.8153, - "step": 4419 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.754412174965693e-05, - "loss": 0.6482, - "step": 4420 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.754281238460223e-05, - "loss": 0.741, - "step": 4421 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7541502719481525e-05, - "loss": 0.7822, - "step": 4422 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7540192754346926e-05, - "loss": 0.8537, - "step": 4423 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7538882489250543e-05, - "loss": 0.7922, - "step": 4424 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.75375719242445e-05, - "loss": 0.8186, - "step": 4425 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.753626105938093e-05, - "loss": 0.7105, - "step": 4426 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.753494989471199e-05, - "loss": 0.8374, - "step": 4427 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.753363843028983e-05, - "loss": 0.8736, - "step": 4428 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7532326666166628e-05, - "loss": 0.8019, - "step": 4429 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.753101460239457e-05, - "loss": 0.7842, - "step": 4430 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7529702239025848e-05, - "loss": 0.7805, - "step": 4431 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.752838957611267e-05, - "loss": 0.676, - "step": 4432 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7527076613707264e-05, - "loss": 0.8499, - "step": 4433 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.752576335186185e-05, - "loss": 0.8277, - "step": 4434 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.752444979062868e-05, - "loss": 0.803, - "step": 4435 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7523135930060005e-05, - "loss": 0.7783, - "step": 4436 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.75218217702081e-05, - "loss": 0.969, - "step": 4437 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7520507311125236e-05, - "loss": 0.8081, - "step": 4438 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7519192552863705e-05, - "loss": 0.7169, - "step": 4439 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7517877495475816e-05, - "loss": 0.773, - "step": 4440 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7516562139013882e-05, - "loss": 0.768, - "step": 4441 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.751524648353023e-05, - "loss": 0.8181, - "step": 4442 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7513930529077196e-05, - "loss": 0.9065, - "step": 4443 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7512614275707135e-05, - "loss": 0.7489, - "step": 4444 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.751129772347241e-05, - "loss": 0.8782, - "step": 4445 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.750998087242539e-05, - "loss": 0.9018, - "step": 4446 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.7508663722618467e-05, - "loss": 0.8459, - "step": 4447 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.750734627410404e-05, - "loss": 0.7542, - "step": 4448 - }, - { - "epoch": 0.8, - "grad_norm": 0.0, - "learning_rate": 1.750602852693452e-05, - "loss": 0.7106, - "step": 4449 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.750471048116232e-05, - "loss": 0.7391, - "step": 4450 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7503392136839884e-05, - "loss": 0.8599, - "step": 4451 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7502073494019653e-05, - "loss": 0.8487, - "step": 4452 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.750075455275409e-05, - "loss": 0.7905, - "step": 4453 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.749943531309566e-05, - "loss": 0.9055, - "step": 4454 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7498115775096843e-05, - "loss": 0.7695, - "step": 4455 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7496795938810137e-05, - "loss": 0.8451, - "step": 4456 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.749547580428805e-05, - "loss": 0.7231, - "step": 4457 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.749415537158309e-05, - "loss": 0.9235, - "step": 4458 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7492834640747793e-05, - "loss": 0.9177, - "step": 4459 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.74915136118347e-05, - "loss": 0.9131, - "step": 4460 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.749019228489636e-05, - "loss": 1.0062, - "step": 4461 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7488870659985337e-05, - "loss": 0.7959, - "step": 4462 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7487548737154213e-05, - "loss": 0.8281, - "step": 4463 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7486226516455575e-05, - "loss": 0.6937, - "step": 4464 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.748490399794202e-05, - "loss": 0.7735, - "step": 4465 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.748358118166616e-05, - "loss": 0.8189, - "step": 4466 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.748225806768062e-05, - "loss": 1.095, - "step": 4467 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7480934656038036e-05, - "loss": 0.7334, - "step": 4468 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.747961094679106e-05, - "loss": 0.8436, - "step": 4469 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.747828693999234e-05, - "loss": 0.8179, - "step": 4470 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7476962635694563e-05, - "loss": 0.7901, - "step": 4471 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.74756380339504e-05, - "loss": 0.8947, - "step": 4472 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7474313134812546e-05, - "loss": 0.8584, - "step": 4473 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7472987938333715e-05, - "loss": 0.8711, - "step": 4474 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.747166244456662e-05, - "loss": 0.7847, - "step": 4475 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7470336653563998e-05, - "loss": 0.8147, - "step": 4476 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7469010565378584e-05, - "loss": 0.8571, - "step": 4477 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7467684180063133e-05, - "loss": 0.7767, - "step": 4478 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7466357497670415e-05, - "loss": 0.8374, - "step": 4479 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.74650305182532e-05, - "loss": 0.6047, - "step": 4480 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7463703241864287e-05, - "loss": 0.7817, - "step": 4481 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7462375668556474e-05, - "loss": 0.9186, - "step": 4482 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.746104779838257e-05, - "loss": 0.7402, - "step": 4483 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7459719631395405e-05, - "loss": 0.7865, - "step": 4484 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.745839116764781e-05, - "loss": 0.7051, - "step": 4485 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7457062407192643e-05, - "loss": 0.9639, - "step": 4486 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.745573335008276e-05, - "loss": 0.7621, - "step": 4487 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7454403996371024e-05, - "loss": 0.7903, - "step": 4488 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7453074346110333e-05, - "loss": 0.7411, - "step": 4489 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7451744399353576e-05, - "loss": 0.8626, - "step": 4490 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7450414156153658e-05, - "loss": 0.6652, - "step": 4491 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7449083616563504e-05, - "loss": 0.8555, - "step": 4492 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7447752780636045e-05, - "loss": 0.9285, - "step": 4493 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.744642164842422e-05, - "loss": 0.8186, - "step": 4494 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7445090219980985e-05, - "loss": 0.7004, - "step": 4495 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7443758495359303e-05, - "loss": 0.7178, - "step": 4496 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7442426474612163e-05, - "loss": 0.7332, - "step": 4497 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7441094157792543e-05, - "loss": 0.9103, - "step": 4498 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.743976154495345e-05, - "loss": 0.8093, - "step": 4499 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7438428636147905e-05, - "loss": 0.8808, - "step": 4500 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.743709543142892e-05, - "loss": 0.8874, - "step": 4501 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.743576193084954e-05, - "loss": 0.814, - "step": 4502 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.743442813446281e-05, - "loss": 0.8131, - "step": 4503 - }, - { - "epoch": 0.81, - "grad_norm": 0.0, - "learning_rate": 1.7433094042321796e-05, - "loss": 0.8295, - "step": 4504 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7431759654479562e-05, - "loss": 0.7948, - "step": 4505 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7430424970989203e-05, - "loss": 0.8606, - "step": 4506 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7429089991903807e-05, - "loss": 0.7375, - "step": 4507 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7427754717276485e-05, - "loss": 0.8904, - "step": 4508 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.742641914716036e-05, - "loss": 0.874, - "step": 4509 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7425083281608555e-05, - "loss": 0.8212, - "step": 4510 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7423747120674218e-05, - "loss": 0.7742, - "step": 4511 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.74224106644105e-05, - "loss": 0.8146, - "step": 4512 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7421073912870573e-05, - "loss": 0.7875, - "step": 4513 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7419736866107615e-05, - "loss": 0.7884, - "step": 4514 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7418399524174813e-05, - "loss": 0.8143, - "step": 4515 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7417061887125368e-05, - "loss": 0.8086, - "step": 4516 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7415723955012493e-05, - "loss": 0.7585, - "step": 4517 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7414385727889418e-05, - "loss": 0.8439, - "step": 4518 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7413047205809378e-05, - "loss": 0.745, - "step": 4519 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.741170838882562e-05, - "loss": 0.7729, - "step": 4520 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7410369276991406e-05, - "loss": 0.8056, - "step": 4521 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7409029870360002e-05, - "loss": 0.9196, - "step": 4522 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7407690168984705e-05, - "loss": 0.7542, - "step": 4523 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7406350172918798e-05, - "loss": 0.8429, - "step": 4524 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7405009882215594e-05, - "loss": 0.8129, - "step": 4525 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7403669296928415e-05, - "loss": 0.8615, - "step": 4526 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7402328417110585e-05, - "loss": 0.8763, - "step": 4527 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.740098724281545e-05, - "loss": 0.7201, - "step": 4528 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.739964577409636e-05, - "loss": 0.8308, - "step": 4529 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7398304011006692e-05, - "loss": 0.9033, - "step": 4530 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7396961953599815e-05, - "loss": 0.9455, - "step": 4531 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7395619601929116e-05, - "loss": 0.785, - "step": 4532 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7394276956048003e-05, - "loss": 0.8573, - "step": 4533 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7392934016009885e-05, - "loss": 0.8218, - "step": 4534 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.739159078186819e-05, - "loss": 0.7367, - "step": 4535 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7390247253676346e-05, - "loss": 0.7393, - "step": 4536 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7388903431487806e-05, - "loss": 0.7472, - "step": 4537 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7387559315356032e-05, - "loss": 0.794, - "step": 4538 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7386214905334494e-05, - "loss": 0.8597, - "step": 4539 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7384870201476675e-05, - "loss": 0.7637, - "step": 4540 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7383525203836062e-05, - "loss": 0.8743, - "step": 4541 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.738217991246617e-05, - "loss": 0.8925, - "step": 4542 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7380834327420516e-05, - "loss": 0.8261, - "step": 4543 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.737948844875263e-05, - "loss": 0.8965, - "step": 4544 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.737814227651605e-05, - "loss": 0.9255, - "step": 4545 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.737679581076433e-05, - "loss": 0.8942, - "step": 4546 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7375449051551035e-05, - "loss": 0.713, - "step": 4547 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7374101998929746e-05, - "loss": 0.7945, - "step": 4548 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7372754652954045e-05, - "loss": 0.7446, - "step": 4549 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7371407013677533e-05, - "loss": 0.8284, - "step": 4550 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.737005908115382e-05, - "loss": 0.9373, - "step": 4551 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7368710855436533e-05, - "loss": 0.7337, - "step": 4552 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7367362336579304e-05, - "loss": 0.7872, - "step": 4553 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7366013524635784e-05, - "loss": 0.7553, - "step": 4554 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7364664419659626e-05, - "loss": 0.7704, - "step": 4555 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7363315021704496e-05, - "loss": 0.9377, - "step": 4556 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.7361965330824088e-05, - "loss": 0.9455, - "step": 4557 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.736061534707208e-05, - "loss": 0.9075, - "step": 4558 - }, - { - "epoch": 0.82, - "grad_norm": 0.0, - "learning_rate": 1.735926507050219e-05, - "loss": 0.7114, - "step": 4559 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7357914501168124e-05, - "loss": 0.7859, - "step": 4560 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.735656363912362e-05, - "loss": 0.9416, - "step": 4561 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7355212484422407e-05, - "loss": 0.8125, - "step": 4562 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.735386103711824e-05, - "loss": 0.8951, - "step": 4563 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7352509297264886e-05, - "loss": 0.8064, - "step": 4564 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7351157264916114e-05, - "loss": 0.83, - "step": 4565 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7349804940125716e-05, - "loss": 0.8058, - "step": 4566 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7348452322947486e-05, - "loss": 0.9418, - "step": 4567 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7347099413435232e-05, - "loss": 0.8035, - "step": 4568 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.734574621164278e-05, - "loss": 0.7084, - "step": 4569 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.734439271762396e-05, - "loss": 0.8065, - "step": 4570 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.734303893143261e-05, - "loss": 0.8356, - "step": 4571 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.73416848531226e-05, - "loss": 0.8564, - "step": 4572 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7340330482747783e-05, - "loss": 0.7529, - "step": 4573 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.733897582036205e-05, - "loss": 0.8974, - "step": 4574 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7337620866019282e-05, - "loss": 0.8305, - "step": 4575 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7336265619773387e-05, - "loss": 0.6524, - "step": 4576 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.733491008167828e-05, - "loss": 0.7179, - "step": 4577 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7333554251787883e-05, - "loss": 0.755, - "step": 4578 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7332198130156134e-05, - "loss": 0.7879, - "step": 4579 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7330841716836982e-05, - "loss": 0.8479, - "step": 4580 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7329485011884394e-05, - "loss": 0.834, - "step": 4581 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.732812801535233e-05, - "loss": 0.7545, - "step": 4582 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.732677072729478e-05, - "loss": 0.727, - "step": 4583 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7325413147765742e-05, - "loss": 0.8994, - "step": 4584 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.732405527681922e-05, - "loss": 0.9114, - "step": 4585 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.732269711450923e-05, - "loss": 0.8767, - "step": 4586 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7321338660889807e-05, - "loss": 0.9349, - "step": 4587 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7319979916014985e-05, - "loss": 0.657, - "step": 4588 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7318620879938824e-05, - "loss": 0.9583, - "step": 4589 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.731726155271539e-05, - "loss": 0.8076, - "step": 4590 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7315901934398755e-05, - "loss": 0.8024, - "step": 4591 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.731454202504301e-05, - "loss": 0.8798, - "step": 4592 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.731318182470225e-05, - "loss": 0.8971, - "step": 4593 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7311821333430586e-05, - "loss": 0.7555, - "step": 4594 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7310460551282148e-05, - "loss": 0.8265, - "step": 4595 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7309099478311063e-05, - "loss": 0.7944, - "step": 4596 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7307738114571483e-05, - "loss": 0.8078, - "step": 4597 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.730637646011756e-05, - "loss": 0.6615, - "step": 4598 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7305014515003465e-05, - "loss": 0.8102, - "step": 4599 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7303652279283375e-05, - "loss": 0.7339, - "step": 4600 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7302289753011492e-05, - "loss": 0.8233, - "step": 4601 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.730092693624201e-05, - "loss": 0.8019, - "step": 4602 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7299563829029145e-05, - "loss": 0.7814, - "step": 4603 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7298200431427128e-05, - "loss": 0.6948, - "step": 4604 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7296836743490195e-05, - "loss": 0.8023, - "step": 4605 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7295472765272598e-05, - "loss": 0.8633, - "step": 4606 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7294108496828594e-05, - "loss": 0.7612, - "step": 4607 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.729274393821246e-05, - "loss": 0.9102, - "step": 4608 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7291379089478478e-05, - "loss": 0.7258, - "step": 4609 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7290013950680948e-05, - "loss": 0.6894, - "step": 4610 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.728864852187417e-05, - "loss": 0.8161, - "step": 4611 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.728728280311247e-05, - "loss": 0.9585, - "step": 4612 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.728591679445018e-05, - "loss": 0.6858, - "step": 4613 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7284550495941633e-05, - "loss": 0.7599, - "step": 4614 - }, - { - "epoch": 0.83, - "grad_norm": 0.0, - "learning_rate": 1.7283183907641188e-05, - "loss": 0.6735, - "step": 4615 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7281817029603213e-05, - "loss": 0.8316, - "step": 4616 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7280449861882084e-05, - "loss": 0.8179, - "step": 4617 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.727908240453218e-05, - "loss": 0.9184, - "step": 4618 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7277714657607917e-05, - "loss": 0.6693, - "step": 4619 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7276346621163692e-05, - "loss": 0.6744, - "step": 4620 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7274978295253933e-05, - "loss": 0.7118, - "step": 4621 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7273609679933077e-05, - "loss": 0.8152, - "step": 4622 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7272240775255568e-05, - "loss": 0.9213, - "step": 4623 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.727087158127586e-05, - "loss": 0.6887, - "step": 4624 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.726950209804843e-05, - "loss": 0.8589, - "step": 4625 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7268132325627748e-05, - "loss": 0.9533, - "step": 4626 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7266762264068313e-05, - "loss": 1.0337, - "step": 4627 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7265391913424624e-05, - "loss": 0.7942, - "step": 4628 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7264021273751203e-05, - "loss": 0.7785, - "step": 4629 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.726265034510257e-05, - "loss": 0.7717, - "step": 4630 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7261279127533264e-05, - "loss": 0.7892, - "step": 4631 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7259907621097835e-05, - "loss": 0.7402, - "step": 4632 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7258535825850845e-05, - "loss": 0.8611, - "step": 4633 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7257163741846865e-05, - "loss": 0.7793, - "step": 4634 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7255791369140476e-05, - "loss": 0.8534, - "step": 4635 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7254418707786282e-05, - "loss": 0.8926, - "step": 4636 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.725304575783888e-05, - "loss": 0.762, - "step": 4637 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7251672519352895e-05, - "loss": 0.8606, - "step": 4638 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.725029899238295e-05, - "loss": 0.862, - "step": 4639 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7248925176983697e-05, - "loss": 0.7824, - "step": 4640 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.724755107320978e-05, - "loss": 0.7408, - "step": 4641 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7246176681115865e-05, - "loss": 0.733, - "step": 4642 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7244802000756628e-05, - "loss": 0.8393, - "step": 4643 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7243427032186756e-05, - "loss": 0.7097, - "step": 4644 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7242051775460948e-05, - "loss": 0.7337, - "step": 4645 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7240676230633916e-05, - "loss": 0.7509, - "step": 4646 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7239300397760373e-05, - "loss": 0.7955, - "step": 4647 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7237924276895066e-05, - "loss": 0.9137, - "step": 4648 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.723654786809273e-05, - "loss": 0.8334, - "step": 4649 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7235171171408123e-05, - "loss": 0.8614, - "step": 4650 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7233794186896012e-05, - "loss": 0.7793, - "step": 4651 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7232416914611175e-05, - "loss": 0.6825, - "step": 4652 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7231039354608407e-05, - "loss": 0.7433, - "step": 4653 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7229661506942502e-05, - "loss": 0.8186, - "step": 4654 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7228283371668278e-05, - "loss": 0.7619, - "step": 4655 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7226904948840558e-05, - "loss": 0.814, - "step": 4656 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7225526238514182e-05, - "loss": 0.8117, - "step": 4657 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.722414724074399e-05, - "loss": 0.9402, - "step": 4658 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7222767955584848e-05, - "loss": 0.7964, - "step": 4659 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7221388383091624e-05, - "loss": 0.7891, - "step": 4660 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7220008523319198e-05, - "loss": 0.8134, - "step": 4661 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7218628376322462e-05, - "loss": 0.7439, - "step": 4662 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7217247942156326e-05, - "loss": 0.958, - "step": 4663 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.72158672208757e-05, - "loss": 0.7165, - "step": 4664 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.721448621253552e-05, - "loss": 0.9694, - "step": 4665 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7213104917190718e-05, - "loss": 0.7991, - "step": 4666 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.721172333489624e-05, - "loss": 0.7803, - "step": 4667 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.721034146570706e-05, - "loss": 0.7688, - "step": 4668 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.720895930967814e-05, - "loss": 0.7964, - "step": 4669 - }, - { - "epoch": 0.84, - "grad_norm": 0.0, - "learning_rate": 1.7207576866864474e-05, - "loss": 0.874, - "step": 4670 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7206194137321047e-05, - "loss": 0.7545, - "step": 4671 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7204811121102877e-05, - "loss": 0.869, - "step": 4672 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7203427818264973e-05, - "loss": 0.8202, - "step": 4673 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7202044228862376e-05, - "loss": 0.6978, - "step": 4674 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7200660352950118e-05, - "loss": 0.9002, - "step": 4675 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7199276190583258e-05, - "loss": 0.9754, - "step": 4676 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7197891741816858e-05, - "loss": 0.8835, - "step": 4677 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.719650700670599e-05, - "loss": 0.7379, - "step": 4678 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7195121985305748e-05, - "loss": 0.7843, - "step": 4679 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7193736677671226e-05, - "loss": 0.7911, - "step": 4680 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7192351083857537e-05, - "loss": 0.7011, - "step": 4681 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7190965203919804e-05, - "loss": 0.9065, - "step": 4682 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.718957903791315e-05, - "loss": 0.6896, - "step": 4683 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7188192585892728e-05, - "loss": 0.7188, - "step": 4684 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7186805847913692e-05, - "loss": 0.7831, - "step": 4685 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7185418824031204e-05, - "loss": 0.7391, - "step": 4686 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.718403151430045e-05, - "loss": 0.7168, - "step": 4687 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7182643918776616e-05, - "loss": 0.7028, - "step": 4688 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.71812560375149e-05, - "loss": 0.8048, - "step": 4689 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7179867870570517e-05, - "loss": 0.7782, - "step": 4690 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7178479417998692e-05, - "loss": 0.831, - "step": 4691 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7177090679854655e-05, - "loss": 0.829, - "step": 4692 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7175701656193658e-05, - "loss": 0.6722, - "step": 4693 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7174312347070956e-05, - "loss": 0.771, - "step": 4694 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7172922752541817e-05, - "loss": 0.949, - "step": 4695 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7171532872661526e-05, - "loss": 0.6704, - "step": 4696 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.717014270748537e-05, - "loss": 0.8201, - "step": 4697 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7168752257068654e-05, - "loss": 0.766, - "step": 4698 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7167361521466694e-05, - "loss": 0.8298, - "step": 4699 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7165970500734812e-05, - "loss": 0.8613, - "step": 4700 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7164579194928347e-05, - "loss": 0.9482, - "step": 4701 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7163187604102647e-05, - "loss": 0.8268, - "step": 4702 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.716179572831308e-05, - "loss": 0.802, - "step": 4703 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7160403567615003e-05, - "loss": 0.7319, - "step": 4704 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.715901112206381e-05, - "loss": 0.7527, - "step": 4705 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.715761839171489e-05, - "loss": 0.7249, - "step": 4706 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7156225376623644e-05, - "loss": 0.8618, - "step": 4707 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7154832076845497e-05, - "loss": 0.7227, - "step": 4708 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7153438492435873e-05, - "loss": 0.8734, - "step": 4709 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.715204462345021e-05, - "loss": 0.8064, - "step": 4710 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.715065046994396e-05, - "loss": 0.8484, - "step": 4711 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.714925603197259e-05, - "loss": 0.7313, - "step": 4712 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.714786130959156e-05, - "loss": 0.8679, - "step": 4713 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7146466302856366e-05, - "loss": 0.7451, - "step": 4714 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.71450710118225e-05, - "loss": 0.7385, - "step": 4715 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7143675436545465e-05, - "loss": 0.7977, - "step": 4716 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.714227957708079e-05, - "loss": 0.87, - "step": 4717 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7140883433483992e-05, - "loss": 0.8358, - "step": 4718 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.713948700581062e-05, - "loss": 0.7904, - "step": 4719 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7138090294116226e-05, - "loss": 0.8624, - "step": 4720 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.713669329845637e-05, - "loss": 0.7358, - "step": 4721 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.713529601888663e-05, - "loss": 0.669, - "step": 4722 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.713389845546259e-05, - "loss": 0.7529, - "step": 4723 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7132500608239847e-05, - "loss": 0.8438, - "step": 4724 - }, - { - "epoch": 0.85, - "grad_norm": 0.0, - "learning_rate": 1.7131102477274014e-05, - "loss": 0.8128, - "step": 4725 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7129704062620706e-05, - "loss": 0.7358, - "step": 4726 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7128305364335556e-05, - "loss": 0.8885, - "step": 4727 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7126906382474207e-05, - "loss": 0.9256, - "step": 4728 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7125507117092316e-05, - "loss": 0.9463, - "step": 4729 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7124107568245544e-05, - "loss": 0.7783, - "step": 4730 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7122707735989568e-05, - "loss": 0.8513, - "step": 4731 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7121307620380077e-05, - "loss": 0.8633, - "step": 4732 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.711990722147277e-05, - "loss": 0.7867, - "step": 4733 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7118506539323355e-05, - "loss": 0.8209, - "step": 4734 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7117105573987556e-05, - "loss": 0.9544, - "step": 4735 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7115704325521108e-05, - "loss": 0.9206, - "step": 4736 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7114302793979745e-05, - "loss": 0.8838, - "step": 4737 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7112900979419237e-05, - "loss": 0.8702, - "step": 4738 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7111498881895338e-05, - "loss": 0.8169, - "step": 4739 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7110096501463833e-05, - "loss": 0.9054, - "step": 4740 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7108693838180506e-05, - "loss": 0.884, - "step": 4741 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7107290892101162e-05, - "loss": 0.6519, - "step": 4742 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7105887663281614e-05, - "loss": 0.8398, - "step": 4743 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7104484151777677e-05, - "loss": 0.8909, - "step": 4744 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7103080357645192e-05, - "loss": 0.9782, - "step": 4745 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7101676280940002e-05, - "loss": 0.7988, - "step": 4746 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7100271921717963e-05, - "loss": 0.658, - "step": 4747 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7098867280034947e-05, - "loss": 0.8263, - "step": 4748 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7097462355946825e-05, - "loss": 0.8392, - "step": 4749 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7096057149509494e-05, - "loss": 0.783, - "step": 4750 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7094651660778852e-05, - "loss": 0.7822, - "step": 4751 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7093245889810816e-05, - "loss": 0.8269, - "step": 4752 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7091839836661305e-05, - "loss": 0.7728, - "step": 4753 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7090433501386254e-05, - "loss": 0.9036, - "step": 4754 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7089026884041612e-05, - "loss": 0.7635, - "step": 4755 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.708761998468334e-05, - "loss": 0.8471, - "step": 4756 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.70862128033674e-05, - "loss": 0.7998, - "step": 4757 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.708480534014978e-05, - "loss": 0.8799, - "step": 4758 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7083397595086465e-05, - "loss": 0.9512, - "step": 4759 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7081989568233455e-05, - "loss": 0.8853, - "step": 4760 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.708058125964677e-05, - "loss": 0.7869, - "step": 4761 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7079172669382434e-05, - "loss": 0.8694, - "step": 4762 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.707776379749648e-05, - "loss": 0.7334, - "step": 4763 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.707635464404496e-05, - "loss": 0.7987, - "step": 4764 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.707494520908393e-05, - "loss": 0.8839, - "step": 4765 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7073535492669457e-05, - "loss": 0.9434, - "step": 4766 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7072125494857624e-05, - "loss": 0.7433, - "step": 4767 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7070715215704526e-05, - "loss": 0.7924, - "step": 4768 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.706930465526626e-05, - "loss": 0.8234, - "step": 4769 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.706789381359895e-05, - "loss": 0.6126, - "step": 4770 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7066482690758713e-05, - "loss": 0.8348, - "step": 4771 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.706507128680169e-05, - "loss": 0.7481, - "step": 4772 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7063659601784026e-05, - "loss": 0.8101, - "step": 4773 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7062247635761886e-05, - "loss": 0.8051, - "step": 4774 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7060835388791433e-05, - "loss": 0.7654, - "step": 4775 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7059422860928853e-05, - "loss": 0.7135, - "step": 4776 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.705801005223034e-05, - "loss": 0.7678, - "step": 4777 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7056596962752096e-05, - "loss": 0.767, - "step": 4778 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7055183592550334e-05, - "loss": 0.7949, - "step": 4779 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7053769941681287e-05, - "loss": 0.6937, - "step": 4780 - }, - { - "epoch": 0.86, - "grad_norm": 0.0, - "learning_rate": 1.7052356010201183e-05, - "loss": 0.7789, - "step": 4781 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7050941798166277e-05, - "loss": 0.8056, - "step": 4782 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.704952730563283e-05, - "loss": 0.797, - "step": 4783 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7048112532657106e-05, - "loss": 0.6999, - "step": 4784 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7046697479295395e-05, - "loss": 0.7991, - "step": 4785 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7045282145603983e-05, - "loss": 0.7783, - "step": 4786 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.704386653163918e-05, - "loss": 0.7518, - "step": 4787 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.70424506374573e-05, - "loss": 0.8755, - "step": 4788 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.704103446311467e-05, - "loss": 0.8214, - "step": 4789 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7039618008667625e-05, - "loss": 0.8387, - "step": 4790 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7038201274172515e-05, - "loss": 0.6966, - "step": 4791 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7036784259685704e-05, - "loss": 0.871, - "step": 4792 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.703536696526356e-05, - "loss": 0.7492, - "step": 4793 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7033949390962467e-05, - "loss": 0.7873, - "step": 4794 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7032531536838814e-05, - "loss": 0.8058, - "step": 4795 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7031113402949012e-05, - "loss": 0.9645, - "step": 4796 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7029694989349472e-05, - "loss": 0.8152, - "step": 4797 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7028276296096625e-05, - "loss": 0.8513, - "step": 4798 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7026857323246902e-05, - "loss": 0.7315, - "step": 4799 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7025438070856764e-05, - "loss": 0.852, - "step": 4800 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.702401853898266e-05, - "loss": 0.7377, - "step": 4801 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7022598727681065e-05, - "loss": 0.7136, - "step": 4802 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7021178637008464e-05, - "loss": 0.7528, - "step": 4803 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7019758267021347e-05, - "loss": 0.9343, - "step": 4804 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7018337617776223e-05, - "loss": 0.8703, - "step": 4805 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7016916689329605e-05, - "loss": 0.7629, - "step": 4806 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.701549548173802e-05, - "loss": 0.8164, - "step": 4807 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7014073995058006e-05, - "loss": 0.8613, - "step": 4808 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7012652229346116e-05, - "loss": 0.6899, - "step": 4809 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.70112301846589e-05, - "loss": 0.8052, - "step": 4810 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7009807861052942e-05, - "loss": 0.759, - "step": 4811 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7008385258584817e-05, - "loss": 0.7215, - "step": 4812 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7006962377311122e-05, - "loss": 0.7853, - "step": 4813 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.700553921728846e-05, - "loss": 0.7833, - "step": 4814 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7004115778573447e-05, - "loss": 0.8619, - "step": 4815 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.700269206122271e-05, - "loss": 0.6859, - "step": 4816 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.7001268065292883e-05, - "loss": 0.8027, - "step": 4817 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.699984379084062e-05, - "loss": 0.7373, - "step": 4818 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.699841923792258e-05, - "loss": 0.7192, - "step": 4819 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6996994406595432e-05, - "loss": 0.7607, - "step": 4820 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6995569296915865e-05, - "loss": 0.8367, - "step": 4821 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.699414390894056e-05, - "loss": 0.7272, - "step": 4822 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6992718242726236e-05, - "loss": 0.6378, - "step": 4823 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6991292298329594e-05, - "loss": 0.8805, - "step": 4824 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6989866075807374e-05, - "loss": 0.7444, - "step": 4825 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.69884395752163e-05, - "loss": 0.8556, - "step": 4826 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6987012796613136e-05, - "loss": 0.8837, - "step": 4827 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6985585740054632e-05, - "loss": 0.6894, - "step": 4828 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6984158405597557e-05, - "loss": 0.7377, - "step": 4829 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6982730793298696e-05, - "loss": 0.9394, - "step": 4830 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6981302903214845e-05, - "loss": 0.8336, - "step": 4831 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6979874735402802e-05, - "loss": 0.8206, - "step": 4832 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6978446289919387e-05, - "loss": 0.9048, - "step": 4833 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6977017566821424e-05, - "loss": 0.801, - "step": 4834 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.6975588566165747e-05, - "loss": 0.8561, - "step": 4835 - }, - { - "epoch": 0.87, - "grad_norm": 0.0, - "learning_rate": 1.697415928800921e-05, - "loss": 0.8066, - "step": 4836 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6972729732408665e-05, - "loss": 0.7858, - "step": 4837 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.697129989942099e-05, - "loss": 0.9084, - "step": 4838 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6969869789103063e-05, - "loss": 0.81, - "step": 4839 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6968439401511772e-05, - "loss": 0.9057, - "step": 4840 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.696700873670403e-05, - "loss": 0.7199, - "step": 4841 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.696557779473674e-05, - "loss": 0.8387, - "step": 4842 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6964146575666835e-05, - "loss": 0.7922, - "step": 4843 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6962715079551248e-05, - "loss": 0.764, - "step": 4844 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.696128330644693e-05, - "loss": 0.837, - "step": 4845 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.695985125641083e-05, - "loss": 0.8966, - "step": 4846 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6958418929499932e-05, - "loss": 0.8673, - "step": 4847 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6956986325771205e-05, - "loss": 0.8996, - "step": 4848 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.695555344528164e-05, - "loss": 0.73, - "step": 4849 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.695412028808825e-05, - "loss": 0.7182, - "step": 4850 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.695268685424804e-05, - "loss": 0.7467, - "step": 4851 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6951253143818035e-05, - "loss": 0.8366, - "step": 4852 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6949819156855272e-05, - "loss": 0.8585, - "step": 4853 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.69483848934168e-05, - "loss": 0.8566, - "step": 4854 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.694695035355967e-05, - "loss": 0.8457, - "step": 4855 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6945515537340958e-05, - "loss": 0.7954, - "step": 4856 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6944080444817737e-05, - "loss": 0.7827, - "step": 4857 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6942645076047098e-05, - "loss": 0.8112, - "step": 4858 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6941209431086147e-05, - "loss": 0.7537, - "step": 4859 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6939773509991995e-05, - "loss": 0.8362, - "step": 4860 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6938337312821764e-05, - "loss": 0.7879, - "step": 4861 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6936900839632583e-05, - "loss": 0.9158, - "step": 4862 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.693546409048161e-05, - "loss": 0.8082, - "step": 4863 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6934027065425992e-05, - "loss": 1.0573, - "step": 4864 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.69325897645229e-05, - "loss": 0.8802, - "step": 4865 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.693115218782951e-05, - "loss": 0.7431, - "step": 4866 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.692971433540301e-05, - "loss": 0.7278, - "step": 4867 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6928276207300603e-05, - "loss": 0.9438, - "step": 4868 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6926837803579503e-05, - "loss": 0.7058, - "step": 4869 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6925399124296928e-05, - "loss": 0.8735, - "step": 4870 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.692396016951011e-05, - "loss": 0.8102, - "step": 4871 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6922520939276296e-05, - "loss": 0.763, - "step": 4872 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.692108143365274e-05, - "loss": 0.781, - "step": 4873 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6919641652696706e-05, - "loss": 0.8354, - "step": 4874 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6918201596465476e-05, - "loss": 0.7873, - "step": 4875 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6916761265016335e-05, - "loss": 0.8365, - "step": 4876 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.691532065840658e-05, - "loss": 0.8449, - "step": 4877 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6913879776693522e-05, - "loss": 0.7218, - "step": 4878 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6912438619934485e-05, - "loss": 0.828, - "step": 4879 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6910997188186797e-05, - "loss": 0.8097, - "step": 4880 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6909555481507803e-05, - "loss": 0.8378, - "step": 4881 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.690811349995485e-05, - "loss": 0.7696, - "step": 4882 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6906671243585313e-05, - "loss": 0.8128, - "step": 4883 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.690522871245656e-05, - "loss": 1.0016, - "step": 4884 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.690378590662598e-05, - "loss": 0.7969, - "step": 4885 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.690234282615097e-05, - "loss": 0.867, - "step": 4886 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6900899471088937e-05, - "loss": 0.7939, - "step": 4887 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6899455841497306e-05, - "loss": 0.7317, - "step": 4888 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6898011937433497e-05, - "loss": 0.8379, - "step": 4889 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.6896567758954958e-05, - "loss": 0.7736, - "step": 4890 - }, - { - "epoch": 0.88, - "grad_norm": 0.0, - "learning_rate": 1.689512330611914e-05, - "loss": 0.8055, - "step": 4891 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.68936785789835e-05, - "loss": 0.9764, - "step": 4892 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.689223357760552e-05, - "loss": 0.8071, - "step": 4893 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.689078830204268e-05, - "loss": 0.7538, - "step": 4894 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.688934275235248e-05, - "loss": 0.921, - "step": 4895 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6887896928592422e-05, - "loss": 0.9291, - "step": 4896 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6886450830820023e-05, - "loss": 0.8519, - "step": 4897 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6885004459092816e-05, - "loss": 0.8748, - "step": 4898 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6883557813468333e-05, - "loss": 0.7849, - "step": 4899 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.688211089400413e-05, - "loss": 0.7661, - "step": 4900 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6880663700757765e-05, - "loss": 0.7354, - "step": 4901 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.687921623378681e-05, - "loss": 0.8179, - "step": 4902 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6877768493148852e-05, - "loss": 0.7929, - "step": 4903 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.687632047890148e-05, - "loss": 0.7717, - "step": 4904 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6874872191102295e-05, - "loss": 0.775, - "step": 4905 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6873423629808922e-05, - "loss": 0.6823, - "step": 4906 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6871974795078978e-05, - "loss": 0.7598, - "step": 4907 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6870525686970106e-05, - "loss": 0.8742, - "step": 4908 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6869076305539955e-05, - "loss": 0.7103, - "step": 4909 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.686762665084618e-05, - "loss": 0.6521, - "step": 4910 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.686617672294645e-05, - "loss": 0.7385, - "step": 4911 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.686472652189845e-05, - "loss": 0.9626, - "step": 4912 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6863276047759867e-05, - "loss": 0.7652, - "step": 4913 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6861825300588405e-05, - "loss": 0.8094, - "step": 4914 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6860374280441778e-05, - "loss": 0.8855, - "step": 4915 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.685892298737771e-05, - "loss": 0.7661, - "step": 4916 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6857471421453938e-05, - "loss": 0.8131, - "step": 4917 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6856019582728206e-05, - "loss": 0.8243, - "step": 4918 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6854567471258265e-05, - "loss": 0.7609, - "step": 4919 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.685311508710189e-05, - "loss": 0.7776, - "step": 4920 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6851662430316857e-05, - "loss": 0.8045, - "step": 4921 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6850209500960955e-05, - "loss": 0.8465, - "step": 4922 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6848756299091982e-05, - "loss": 0.6938, - "step": 4923 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.684730282476775e-05, - "loss": 1.0064, - "step": 4924 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6845849078046085e-05, - "loss": 0.7985, - "step": 4925 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6844395058984817e-05, - "loss": 0.7273, - "step": 4926 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6842940767641783e-05, - "loss": 0.6835, - "step": 4927 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6841486204074848e-05, - "loss": 0.8445, - "step": 4928 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.684003136834187e-05, - "loss": 0.7264, - "step": 4929 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6838576260500727e-05, - "loss": 0.8325, - "step": 4930 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6837120880609303e-05, - "loss": 0.7716, - "step": 4931 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.68356652287255e-05, - "loss": 0.6971, - "step": 4932 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6834209304907223e-05, - "loss": 0.8612, - "step": 4933 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6832753109212394e-05, - "loss": 0.7677, - "step": 4934 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.683129664169894e-05, - "loss": 0.6462, - "step": 4935 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6829839902424802e-05, - "loss": 0.8169, - "step": 4936 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6828382891447933e-05, - "loss": 0.8014, - "step": 4937 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6826925608826295e-05, - "loss": 0.852, - "step": 4938 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.682546805461786e-05, - "loss": 0.8504, - "step": 4939 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6824010228880612e-05, - "loss": 0.6894, - "step": 4940 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.682255213167255e-05, - "loss": 0.8527, - "step": 4941 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.682109376305168e-05, - "loss": 0.7725, - "step": 4942 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6819635123076008e-05, - "loss": 0.7943, - "step": 4943 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6818176211803572e-05, - "loss": 0.9307, - "step": 4944 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6816717029292403e-05, - "loss": 0.7672, - "step": 4945 - }, - { - "epoch": 0.89, - "grad_norm": 0.0, - "learning_rate": 1.6815257575600555e-05, - "loss": 0.8229, - "step": 4946 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6813797850786086e-05, - "loss": 0.9535, - "step": 4947 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6812337854907067e-05, - "loss": 0.7871, - "step": 4948 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6810877588021574e-05, - "loss": 0.7361, - "step": 4949 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6809417050187704e-05, - "loss": 0.9092, - "step": 4950 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.680795624146356e-05, - "loss": 0.9823, - "step": 4951 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6806495161907253e-05, - "loss": 0.7835, - "step": 4952 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.680503381157691e-05, - "loss": 0.6768, - "step": 4953 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6803572190530662e-05, - "loss": 0.7588, - "step": 4954 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6802110298826658e-05, - "loss": 0.8513, - "step": 4955 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6800648136523054e-05, - "loss": 0.7612, - "step": 4956 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6799185703678017e-05, - "loss": 0.8042, - "step": 4957 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6797723000349723e-05, - "loss": 0.9143, - "step": 4958 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6796260026596368e-05, - "loss": 0.7441, - "step": 4959 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6794796782476144e-05, - "loss": 0.7829, - "step": 4960 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.679333326804726e-05, - "loss": 0.7304, - "step": 4961 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6791869483367943e-05, - "loss": 0.9812, - "step": 4962 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6790405428496426e-05, - "loss": 0.7923, - "step": 4963 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6788941103490946e-05, - "loss": 0.8789, - "step": 4964 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.678747650840976e-05, - "loss": 0.7637, - "step": 4965 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.678601164331113e-05, - "loss": 0.8621, - "step": 4966 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6784546508253332e-05, - "loss": 0.8818, - "step": 4967 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.678308110329465e-05, - "loss": 0.6348, - "step": 4968 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.678161542849338e-05, - "loss": 0.8139, - "step": 4969 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6780149483907835e-05, - "loss": 0.755, - "step": 4970 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.677868326959633e-05, - "loss": 0.7094, - "step": 4971 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6777216785617187e-05, - "loss": 0.8134, - "step": 4972 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.677575003202875e-05, - "loss": 0.8559, - "step": 4973 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6774283008889373e-05, - "loss": 0.8612, - "step": 4974 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6772815716257414e-05, - "loss": 0.9016, - "step": 4975 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.677134815419124e-05, - "loss": 0.726, - "step": 4976 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6769880322749235e-05, - "loss": 0.7154, - "step": 4977 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6768412221989798e-05, - "loss": 0.7852, - "step": 4978 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6766943851971323e-05, - "loss": 0.8648, - "step": 4979 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6765475212752232e-05, - "loss": 1.0873, - "step": 4980 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6764006304390946e-05, - "loss": 0.8127, - "step": 4981 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6762537126945904e-05, - "loss": 0.7873, - "step": 4982 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.676106768047555e-05, - "loss": 0.7961, - "step": 4983 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6759597965038338e-05, - "loss": 0.8666, - "step": 4984 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.675812798069274e-05, - "loss": 0.755, - "step": 4985 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6756657727497236e-05, - "loss": 0.8243, - "step": 4986 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.675518720551031e-05, - "loss": 0.7155, - "step": 4987 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6753716414790468e-05, - "loss": 0.7086, - "step": 4988 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6752245355396212e-05, - "loss": 0.8074, - "step": 4989 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6750774027386074e-05, - "loss": 0.7968, - "step": 4990 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.674930243081858e-05, - "loss": 0.7574, - "step": 4991 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.674783056575227e-05, - "loss": 0.8638, - "step": 4992 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.67463584322457e-05, - "loss": 0.8444, - "step": 4993 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.674488603035744e-05, - "loss": 0.8284, - "step": 4994 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6743413360146058e-05, - "loss": 0.8243, - "step": 4995 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6741940421670134e-05, - "loss": 0.824, - "step": 4996 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6740467214988275e-05, - "loss": 0.9632, - "step": 4997 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.673899374015908e-05, - "loss": 0.6446, - "step": 4998 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6737519997241175e-05, - "loss": 0.828, - "step": 4999 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.673604598629318e-05, - "loss": 0.8204, - "step": 5000 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.6734571707373736e-05, - "loss": 0.8068, - "step": 5001 - }, - { - "epoch": 0.9, - "grad_norm": 0.0, - "learning_rate": 1.673309716054149e-05, - "loss": 0.8003, - "step": 5002 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6731622345855107e-05, - "loss": 0.9149, - "step": 5003 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6730147263373256e-05, - "loss": 0.6446, - "step": 5004 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6728671913154615e-05, - "loss": 0.7522, - "step": 5005 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.672719629525788e-05, - "loss": 0.7441, - "step": 5006 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6725720409741752e-05, - "loss": 0.6983, - "step": 5007 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6724244256664944e-05, - "loss": 0.8338, - "step": 5008 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6722767836086183e-05, - "loss": 0.6729, - "step": 5009 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6721291148064196e-05, - "loss": 0.768, - "step": 5010 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6719814192657736e-05, - "loss": 0.7717, - "step": 5011 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6718336969925557e-05, - "loss": 0.8257, - "step": 5012 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.671685947992642e-05, - "loss": 0.9188, - "step": 5013 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6715381722719106e-05, - "loss": 0.7203, - "step": 5014 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6713903698362406e-05, - "loss": 0.8672, - "step": 5015 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6712425406915117e-05, - "loss": 0.7929, - "step": 5016 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6710946848436042e-05, - "loss": 0.7771, - "step": 5017 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6709468022984004e-05, - "loss": 0.8427, - "step": 5018 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6707988930617837e-05, - "loss": 0.8434, - "step": 5019 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.670650957139638e-05, - "loss": 0.8245, - "step": 5020 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6705029945378483e-05, - "loss": 0.7623, - "step": 5021 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6703550052623006e-05, - "loss": 0.6886, - "step": 5022 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6702069893188825e-05, - "loss": 0.8498, - "step": 5023 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6700589467134825e-05, - "loss": 0.85, - "step": 5024 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6699108774519896e-05, - "loss": 0.7604, - "step": 5025 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6697627815402943e-05, - "loss": 0.8912, - "step": 5026 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.669614658984288e-05, - "loss": 0.8399, - "step": 5027 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6694665097898637e-05, - "loss": 0.6796, - "step": 5028 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6693183339629148e-05, - "loss": 0.786, - "step": 5029 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6691701315093357e-05, - "loss": 0.7263, - "step": 5030 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6690219024350226e-05, - "loss": 0.8714, - "step": 5031 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.668873646745872e-05, - "loss": 0.9774, - "step": 5032 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6687253644477822e-05, - "loss": 0.7162, - "step": 5033 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6685770555466514e-05, - "loss": 0.6742, - "step": 5034 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6684287200483805e-05, - "loss": 0.8882, - "step": 5035 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6682803579588695e-05, - "loss": 0.7395, - "step": 5036 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6681319692840216e-05, - "loss": 0.7659, - "step": 5037 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6679835540297385e-05, - "loss": 0.8327, - "step": 5038 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6678351122019262e-05, - "loss": 0.8162, - "step": 5039 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6676866438064888e-05, - "loss": 0.7012, - "step": 5040 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6675381488493327e-05, - "loss": 0.8191, - "step": 5041 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6673896273363657e-05, - "loss": 0.7143, - "step": 5042 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.667241079273496e-05, - "loss": 0.6545, - "step": 5043 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.667092504666633e-05, - "loss": 0.7466, - "step": 5044 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6669439035216875e-05, - "loss": 0.758, - "step": 5045 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.666795275844571e-05, - "loss": 0.8729, - "step": 5046 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6666466216411962e-05, - "loss": 0.7982, - "step": 5047 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6664979409174766e-05, - "loss": 0.7471, - "step": 5048 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.666349233679327e-05, - "loss": 0.8255, - "step": 5049 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6662004999326634e-05, - "loss": 0.723, - "step": 5050 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6660517396834025e-05, - "loss": 0.7153, - "step": 5051 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.665902952937463e-05, - "loss": 0.9175, - "step": 5052 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.665754139700763e-05, - "loss": 0.7145, - "step": 5053 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.6656052999792226e-05, - "loss": 0.7859, - "step": 5054 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.665456433778763e-05, - "loss": 0.7402, - "step": 5055 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.665307541105307e-05, - "loss": 0.8365, - "step": 5056 - }, - { - "epoch": 0.91, - "grad_norm": 0.0, - "learning_rate": 1.665158621964777e-05, - "loss": 0.8657, - "step": 5057 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6650096763630977e-05, - "loss": 0.6947, - "step": 5058 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.664860704306194e-05, - "loss": 0.6728, - "step": 5059 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6647117057999926e-05, - "loss": 0.8372, - "step": 5060 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6645626808504207e-05, - "loss": 0.9066, - "step": 5061 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6644136294634075e-05, - "loss": 0.9123, - "step": 5062 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.664264551644881e-05, - "loss": 0.7095, - "step": 5063 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.664115447400773e-05, - "loss": 0.9396, - "step": 5064 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6639663167370154e-05, - "loss": 0.8104, - "step": 5065 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6638171596595398e-05, - "loss": 0.8171, - "step": 5066 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6636679761742803e-05, - "loss": 0.7247, - "step": 5067 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6635187662871722e-05, - "loss": 0.8576, - "step": 5068 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6633695300041505e-05, - "loss": 0.8249, - "step": 5069 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.663220267331153e-05, - "loss": 0.8242, - "step": 5070 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6630709782741162e-05, - "loss": 0.8054, - "step": 5071 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6629216628389807e-05, - "loss": 0.8365, - "step": 5072 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6627723210316857e-05, - "loss": 0.8586, - "step": 5073 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.662622952858172e-05, - "loss": 0.8186, - "step": 5074 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6624735583243827e-05, - "loss": 0.7316, - "step": 5075 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.66232413743626e-05, - "loss": 0.7941, - "step": 5076 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6621746901997487e-05, - "loss": 0.81, - "step": 5077 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6620252166207935e-05, - "loss": 0.8671, - "step": 5078 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6618757167053413e-05, - "loss": 0.8633, - "step": 5079 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.661726190459339e-05, - "loss": 0.788, - "step": 5080 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6615766378887356e-05, - "loss": 0.9313, - "step": 5081 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.66142705899948e-05, - "loss": 0.9103, - "step": 5082 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6612774537975233e-05, - "loss": 0.8758, - "step": 5083 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6611278222888163e-05, - "loss": 0.8792, - "step": 5084 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6609781644793117e-05, - "loss": 0.7899, - "step": 5085 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6608284803749636e-05, - "loss": 0.7147, - "step": 5086 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.660678769981727e-05, - "loss": 0.8291, - "step": 5087 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6605290333055565e-05, - "loss": 0.782, - "step": 5088 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.66037927035241e-05, - "loss": 0.7554, - "step": 5089 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6602294811282443e-05, - "loss": 0.6973, - "step": 5090 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.660079665639019e-05, - "loss": 0.8574, - "step": 5091 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6599298238906937e-05, - "loss": 0.8731, - "step": 5092 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6597799558892294e-05, - "loss": 0.7508, - "step": 5093 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.659630061640589e-05, - "loss": 0.8063, - "step": 5094 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6594801411507337e-05, - "loss": 0.7525, - "step": 5095 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6593301944256292e-05, - "loss": 0.8319, - "step": 5096 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.65918022147124e-05, - "loss": 0.8813, - "step": 5097 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6590302222935323e-05, - "loss": 0.7969, - "step": 5098 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6588801968984735e-05, - "loss": 0.7899, - "step": 5099 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6587301452920317e-05, - "loss": 0.7549, - "step": 5100 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.658580067480176e-05, - "loss": 0.8535, - "step": 5101 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.658429963468877e-05, - "loss": 0.9359, - "step": 5102 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6582798332641064e-05, - "loss": 0.7287, - "step": 5103 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6581296768718364e-05, - "loss": 0.9278, - "step": 5104 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.65797949429804e-05, - "loss": 0.8678, - "step": 5105 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6578292855486927e-05, - "loss": 0.8281, - "step": 5106 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6576790506297694e-05, - "loss": 0.8189, - "step": 5107 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6575287895472462e-05, - "loss": 0.8011, - "step": 5108 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.657378502307102e-05, - "loss": 0.8734, - "step": 5109 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6572281889153143e-05, - "loss": 0.7949, - "step": 5110 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.657077849377864e-05, - "loss": 0.7392, - "step": 5111 - }, - { - "epoch": 0.92, - "grad_norm": 0.0, - "learning_rate": 1.6569274837007302e-05, - "loss": 0.7589, - "step": 5112 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6567770918898965e-05, - "loss": 0.7501, - "step": 5113 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.656626673951345e-05, - "loss": 0.7135, - "step": 5114 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6564762298910592e-05, - "loss": 0.8755, - "step": 5115 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6563257597150246e-05, - "loss": 0.8326, - "step": 5116 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6561752634292267e-05, - "loss": 0.8214, - "step": 5117 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.656024741039653e-05, - "loss": 0.725, - "step": 5118 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.655874192552291e-05, - "loss": 0.7424, - "step": 5119 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.65572361797313e-05, - "loss": 0.8695, - "step": 5120 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6555730173081608e-05, - "loss": 0.6844, - "step": 5121 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6554223905633734e-05, - "loss": 0.6653, - "step": 5122 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6552717377447603e-05, - "loss": 0.7404, - "step": 5123 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6551210588583153e-05, - "loss": 0.8547, - "step": 5124 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6549703539100323e-05, - "loss": 0.789, - "step": 5125 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6548196229059065e-05, - "loss": 0.854, - "step": 5126 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6546688658519343e-05, - "loss": 0.7771, - "step": 5127 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.654518082754113e-05, - "loss": 0.7817, - "step": 5128 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.654367273618441e-05, - "loss": 0.8823, - "step": 5129 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6542164384509184e-05, - "loss": 0.8718, - "step": 5130 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6540655772575447e-05, - "loss": 0.8766, - "step": 5131 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.653914690044322e-05, - "loss": 0.7092, - "step": 5132 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6537637768172527e-05, - "loss": 0.8916, - "step": 5133 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6536128375823405e-05, - "loss": 0.8109, - "step": 5134 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6534618723455897e-05, - "loss": 0.7936, - "step": 5135 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6533108811130064e-05, - "loss": 0.8636, - "step": 5136 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6531598638905972e-05, - "loss": 0.6851, - "step": 5137 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6530088206843694e-05, - "loss": 0.8504, - "step": 5138 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.652857751500332e-05, - "loss": 0.7654, - "step": 5139 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.652706656344495e-05, - "loss": 0.7754, - "step": 5140 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.652555535222869e-05, - "loss": 1.0041, - "step": 5141 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.652404388141466e-05, - "loss": 0.7965, - "step": 5142 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6522532151062987e-05, - "loss": 0.7831, - "step": 5143 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.652102016123381e-05, - "loss": 0.7458, - "step": 5144 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.651950791198728e-05, - "loss": 0.8565, - "step": 5145 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.651799540338356e-05, - "loss": 0.9063, - "step": 5146 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6516482635482817e-05, - "loss": 0.8923, - "step": 5147 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6514969608345228e-05, - "loss": 0.7682, - "step": 5148 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.651345632203099e-05, - "loss": 0.802, - "step": 5149 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.65119427766003e-05, - "loss": 0.7308, - "step": 5150 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.651042897211337e-05, - "loss": 0.7592, - "step": 5151 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6508914908630425e-05, - "loss": 0.82, - "step": 5152 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6507400586211693e-05, - "loss": 0.716, - "step": 5153 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6505886004917418e-05, - "loss": 0.8441, - "step": 5154 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6504371164807852e-05, - "loss": 0.8834, - "step": 5155 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6502856065943257e-05, - "loss": 0.8513, - "step": 5156 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6501340708383913e-05, - "loss": 0.8067, - "step": 5157 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6499825092190093e-05, - "loss": 0.8267, - "step": 5158 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6498309217422098e-05, - "loss": 0.8778, - "step": 5159 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6496793084140226e-05, - "loss": 0.7692, - "step": 5160 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.64952766924048e-05, - "loss": 0.6819, - "step": 5161 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.649376004227614e-05, - "loss": 0.7813, - "step": 5162 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6492243133814582e-05, - "loss": 0.6286, - "step": 5163 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.649072596708047e-05, - "loss": 0.677, - "step": 5164 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6489208542134158e-05, - "loss": 0.8121, - "step": 5165 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.6487690859036015e-05, - "loss": 0.755, - "step": 5166 - }, - { - "epoch": 0.93, - "grad_norm": 0.0, - "learning_rate": 1.648617291784641e-05, - "loss": 0.8261, - "step": 5167 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6484654718625743e-05, - "loss": 0.8239, - "step": 5168 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.64831362614344e-05, - "loss": 0.7488, - "step": 5169 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.648161754633279e-05, - "loss": 0.9009, - "step": 5170 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.648009857338133e-05, - "loss": 0.852, - "step": 5171 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6478579342640447e-05, - "loss": 0.8208, - "step": 5172 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6477059854170582e-05, - "loss": 0.7774, - "step": 5173 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6475540108032176e-05, - "loss": 0.8823, - "step": 5174 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6474020104285693e-05, - "loss": 0.8789, - "step": 5175 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.64724998429916e-05, - "loss": 0.77, - "step": 5176 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6470979324210376e-05, - "loss": 0.8128, - "step": 5177 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6469458548002508e-05, - "loss": 0.8, - "step": 5178 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6467937514428496e-05, - "loss": 0.7412, - "step": 5179 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.646641622354885e-05, - "loss": 0.8518, - "step": 5180 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.646489467542409e-05, - "loss": 0.8141, - "step": 5181 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6463372870114743e-05, - "loss": 0.7475, - "step": 5182 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6461850807681354e-05, - "loss": 0.7365, - "step": 5183 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6460328488184467e-05, - "loss": 0.7972, - "step": 5184 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6458805911684644e-05, - "loss": 0.8654, - "step": 5185 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6457283078242462e-05, - "loss": 0.7045, - "step": 5186 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6455759987918494e-05, - "loss": 0.7526, - "step": 5187 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6454236640773338e-05, - "loss": 0.7987, - "step": 5188 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6452713036867588e-05, - "loss": 0.9467, - "step": 5189 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.645118917626186e-05, - "loss": 0.7798, - "step": 5190 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6449665059016774e-05, - "loss": 0.8028, - "step": 5191 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6448140685192963e-05, - "loss": 0.7772, - "step": 5192 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6446616054851072e-05, - "loss": 0.9021, - "step": 5193 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.644509116805175e-05, - "loss": 0.7198, - "step": 5194 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6443566024855663e-05, - "loss": 0.8343, - "step": 5195 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6442040625323475e-05, - "loss": 0.8282, - "step": 5196 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6440514969515878e-05, - "loss": 0.9288, - "step": 5197 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.643898905749356e-05, - "loss": 0.7877, - "step": 5198 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.643746288931723e-05, - "loss": 0.7419, - "step": 5199 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6435936465047598e-05, - "loss": 0.8725, - "step": 5200 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6434409784745384e-05, - "loss": 0.9408, - "step": 5201 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6432882848471332e-05, - "loss": 0.7153, - "step": 5202 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.643135565628618e-05, - "loss": 0.8171, - "step": 5203 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6429828208250676e-05, - "loss": 0.8365, - "step": 5204 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6428300504425596e-05, - "loss": 0.877, - "step": 5205 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6426772544871707e-05, - "loss": 0.9315, - "step": 5206 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6425244329649797e-05, - "loss": 0.9376, - "step": 5207 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6423715858820662e-05, - "loss": 0.8965, - "step": 5208 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6422187132445105e-05, - "loss": 0.8789, - "step": 5209 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6420658150583944e-05, - "loss": 0.7748, - "step": 5210 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6419128913297998e-05, - "loss": 0.9182, - "step": 5211 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6417599420648113e-05, - "loss": 1.0017, - "step": 5212 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6416069672695124e-05, - "loss": 0.9506, - "step": 5213 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6414539669499897e-05, - "loss": 0.7698, - "step": 5214 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.641300941112329e-05, - "loss": 0.7572, - "step": 5215 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.641147889762618e-05, - "loss": 0.9047, - "step": 5216 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.640994812906946e-05, - "loss": 0.8317, - "step": 5217 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6408417105514024e-05, - "loss": 0.7818, - "step": 5218 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6406885827020773e-05, - "loss": 0.8035, - "step": 5219 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.6405354293650627e-05, - "loss": 0.8471, - "step": 5220 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.640382250546452e-05, - "loss": 0.8412, - "step": 5221 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.640229046252338e-05, - "loss": 0.9005, - "step": 5222 - }, - { - "epoch": 0.94, - "grad_norm": 0.0, - "learning_rate": 1.640075816488816e-05, - "loss": 0.7082, - "step": 5223 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6399225612619814e-05, - "loss": 0.6884, - "step": 5224 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6397692805779307e-05, - "loss": 0.7163, - "step": 5225 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6396159744427623e-05, - "loss": 0.8474, - "step": 5226 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6394626428625748e-05, - "loss": 0.8889, - "step": 5227 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.639309285843468e-05, - "loss": 0.7398, - "step": 5228 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.639155903391542e-05, - "loss": 0.9153, - "step": 5229 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6390024955129e-05, - "loss": 0.7607, - "step": 5230 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6388490622136437e-05, - "loss": 0.6856, - "step": 5231 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6386956034998775e-05, - "loss": 0.732, - "step": 5232 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6385421193777057e-05, - "loss": 0.8027, - "step": 5233 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.638388609853235e-05, - "loss": 0.7305, - "step": 5234 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6382350749325712e-05, - "loss": 0.751, - "step": 5235 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.638081514621823e-05, - "loss": 0.852, - "step": 5236 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6379279289270994e-05, - "loss": 0.8228, - "step": 5237 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6377743178545094e-05, - "loss": 0.8659, - "step": 5238 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.637620681410165e-05, - "loss": 0.8785, - "step": 5239 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6374670196001773e-05, - "loss": 0.7207, - "step": 5240 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6373133324306592e-05, - "loss": 0.831, - "step": 5241 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6371596199077252e-05, - "loss": 0.7891, - "step": 5242 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6370058820374906e-05, - "loss": 0.9051, - "step": 5243 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.63685211882607e-05, - "loss": 0.814, - "step": 5244 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6366983302795815e-05, - "loss": 0.7993, - "step": 5245 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6365445164041426e-05, - "loss": 0.901, - "step": 5246 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.636390677205872e-05, - "loss": 0.804, - "step": 5247 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6362368126908904e-05, - "loss": 0.7579, - "step": 5248 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6360829228653187e-05, - "loss": 0.7021, - "step": 5249 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6359290077352784e-05, - "loss": 0.7663, - "step": 5250 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.635775067306893e-05, - "loss": 0.7863, - "step": 5251 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6356211015862858e-05, - "loss": 0.7503, - "step": 5252 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6354671105795826e-05, - "loss": 0.7951, - "step": 5253 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.635313094292909e-05, - "loss": 0.731, - "step": 5254 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6351590527323923e-05, - "loss": 0.8015, - "step": 5255 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6350049859041606e-05, - "loss": 0.8431, - "step": 5256 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6348508938143422e-05, - "loss": 0.7942, - "step": 5257 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.634696776469068e-05, - "loss": 0.7697, - "step": 5258 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6345426338744687e-05, - "loss": 0.6964, - "step": 5259 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6343884660366765e-05, - "loss": 0.7924, - "step": 5260 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6342342729618242e-05, - "loss": 0.8124, - "step": 5261 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6340800546560462e-05, - "loss": 0.8875, - "step": 5262 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6339258111254772e-05, - "loss": 0.789, - "step": 5263 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6337715423762536e-05, - "loss": 0.7968, - "step": 5264 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.633617248414512e-05, - "loss": 0.8756, - "step": 5265 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6334629292463913e-05, - "loss": 0.9239, - "step": 5266 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.63330858487803e-05, - "loss": 0.9094, - "step": 5267 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.633154215315568e-05, - "loss": 0.8293, - "step": 5268 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6329998205651464e-05, - "loss": 0.8535, - "step": 5269 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6328454006329083e-05, - "loss": 0.8595, - "step": 5270 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6326909555249954e-05, - "loss": 0.8468, - "step": 5271 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.632536485247553e-05, - "loss": 0.9713, - "step": 5272 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.632381989806725e-05, - "loss": 0.8635, - "step": 5273 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6322274692086586e-05, - "loss": 0.884, - "step": 5274 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6320729234595e-05, - "loss": 0.898, - "step": 5275 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.631918352565398e-05, - "loss": 0.8801, - "step": 5276 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6317637565325014e-05, - "loss": 0.7424, - "step": 5277 - }, - { - "epoch": 0.95, - "grad_norm": 0.0, - "learning_rate": 1.6316091353669605e-05, - "loss": 0.7851, - "step": 5278 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6314544890749258e-05, - "loss": 0.7659, - "step": 5279 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.63129981766255e-05, - "loss": 0.7875, - "step": 5280 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6311451211359857e-05, - "loss": 0.6885, - "step": 5281 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6309903995013875e-05, - "loss": 0.8118, - "step": 5282 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6308356527649106e-05, - "loss": 0.7663, - "step": 5283 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.63068088093271e-05, - "loss": 0.6884, - "step": 5284 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6305260840109444e-05, - "loss": 0.7114, - "step": 5285 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.63037126200577e-05, - "loss": 0.7004, - "step": 5286 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6302164149233476e-05, - "loss": 0.7439, - "step": 5287 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6300615427698362e-05, - "loss": 0.786, - "step": 5288 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6299066455513978e-05, - "loss": 0.7651, - "step": 5289 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6297517232741937e-05, - "loss": 0.9203, - "step": 5290 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.629596775944387e-05, - "loss": 0.7596, - "step": 5291 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6294418035681422e-05, - "loss": 0.7616, - "step": 5292 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6292868061516242e-05, - "loss": 0.973, - "step": 5293 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.629131783700999e-05, - "loss": 0.8095, - "step": 5294 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6289767362224337e-05, - "loss": 0.6996, - "step": 5295 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6288216637220963e-05, - "loss": 0.8053, - "step": 5296 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6286665662061557e-05, - "loss": 0.7757, - "step": 5297 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6285114436807826e-05, - "loss": 0.9037, - "step": 5298 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.628356296152147e-05, - "loss": 0.7806, - "step": 5299 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6282011236264222e-05, - "loss": 0.8307, - "step": 5300 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.62804592610978e-05, - "loss": 0.7152, - "step": 5301 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.627890703608395e-05, - "loss": 0.7377, - "step": 5302 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6277354561284423e-05, - "loss": 0.7091, - "step": 5303 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6275801836760976e-05, - "loss": 0.7128, - "step": 5304 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.627424886257538e-05, - "loss": 0.8979, - "step": 5305 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.627269563878942e-05, - "loss": 0.7307, - "step": 5306 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.627114216546488e-05, - "loss": 0.763, - "step": 5307 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6269588442663563e-05, - "loss": 0.7387, - "step": 5308 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6268034470447273e-05, - "loss": 0.6678, - "step": 5309 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.626648024887784e-05, - "loss": 0.8635, - "step": 5310 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.626492577801708e-05, - "loss": 0.7569, - "step": 5311 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.626337105792685e-05, - "loss": 0.7374, - "step": 5312 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6261816088668982e-05, - "loss": 0.6805, - "step": 5313 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6260260870305345e-05, - "loss": 0.6995, - "step": 5314 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.625870540289781e-05, - "loss": 0.8436, - "step": 5315 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6257149686508245e-05, - "loss": 0.7619, - "step": 5316 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.625559372119855e-05, - "loss": 0.7611, - "step": 5317 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.625403750703062e-05, - "loss": 0.6763, - "step": 5318 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6252481044066366e-05, - "loss": 0.933, - "step": 5319 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.62509243323677e-05, - "loss": 0.8337, - "step": 5320 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.624936737199656e-05, - "loss": 0.8059, - "step": 5321 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.624781016301488e-05, - "loss": 0.7676, - "step": 5322 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.624625270548461e-05, - "loss": 0.8486, - "step": 5323 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6244694999467704e-05, - "loss": 0.6928, - "step": 5324 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6243137045026135e-05, - "loss": 0.9046, - "step": 5325 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.624157884222188e-05, - "loss": 0.7301, - "step": 5326 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6240020391116923e-05, - "loss": 0.8793, - "step": 5327 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6238461691773265e-05, - "loss": 0.7176, - "step": 5328 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6236902744252915e-05, - "loss": 0.6448, - "step": 5329 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.623534354861789e-05, - "loss": 0.7756, - "step": 5330 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6233784104930218e-05, - "loss": 0.8583, - "step": 5331 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.623222441325193e-05, - "loss": 0.6685, - "step": 5332 - }, - { - "epoch": 0.96, - "grad_norm": 0.0, - "learning_rate": 1.6230664473645085e-05, - "loss": 0.8119, - "step": 5333 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6229104286171732e-05, - "loss": 0.8834, - "step": 5334 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6227543850893938e-05, - "loss": 0.7661, - "step": 5335 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6225983167873782e-05, - "loss": 0.8299, - "step": 5336 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.622442223717335e-05, - "loss": 0.6548, - "step": 5337 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6222861058854736e-05, - "loss": 0.8788, - "step": 5338 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.622129963298005e-05, - "loss": 0.7785, - "step": 5339 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.621973795961141e-05, - "loss": 0.699, - "step": 5340 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6218176038810935e-05, - "loss": 0.7641, - "step": 5341 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6216613870640767e-05, - "loss": 0.761, - "step": 5342 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.621505145516305e-05, - "loss": 0.8133, - "step": 5343 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6213488792439936e-05, - "loss": 0.7541, - "step": 5344 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6211925882533597e-05, - "loss": 0.7725, - "step": 5345 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6210362725506202e-05, - "loss": 0.8008, - "step": 5346 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.620879932141994e-05, - "loss": 0.897, - "step": 5347 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6207235670337004e-05, - "loss": 0.7823, - "step": 5348 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6205671772319604e-05, - "loss": 0.7654, - "step": 5349 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6204107627429947e-05, - "loss": 0.8368, - "step": 5350 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.620254323573026e-05, - "loss": 0.8252, - "step": 5351 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6200978597282775e-05, - "loss": 0.9227, - "step": 5352 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6199413712149742e-05, - "loss": 0.7499, - "step": 5353 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.619784858039341e-05, - "loss": 0.7273, - "step": 5354 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6196283202076043e-05, - "loss": 0.923, - "step": 5355 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.619471757725992e-05, - "loss": 0.7833, - "step": 5356 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6193151706007313e-05, - "loss": 0.864, - "step": 5357 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6191585588380525e-05, - "loss": 0.7776, - "step": 5358 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6190019224441855e-05, - "loss": 0.7734, - "step": 5359 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6188452614253617e-05, - "loss": 0.8518, - "step": 5360 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6186885757878133e-05, - "loss": 0.897, - "step": 5361 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6185318655377735e-05, - "loss": 0.725, - "step": 5362 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6183751306814763e-05, - "loss": 0.8762, - "step": 5363 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.618218371225157e-05, - "loss": 0.7408, - "step": 5364 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.618061587175052e-05, - "loss": 0.8631, - "step": 5365 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6179047785373984e-05, - "loss": 0.8082, - "step": 5366 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.617747945318434e-05, - "loss": 0.7827, - "step": 5367 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6175910875243982e-05, - "loss": 0.8626, - "step": 5368 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6174342051615305e-05, - "loss": 0.9515, - "step": 5369 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.617277298236073e-05, - "loss": 0.8638, - "step": 5370 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6171203667542664e-05, - "loss": 0.8934, - "step": 5371 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6169634107223554e-05, - "loss": 0.7962, - "step": 5372 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6168064301465826e-05, - "loss": 0.9707, - "step": 5373 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.616649425033193e-05, - "loss": 0.8821, - "step": 5374 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6164923953884332e-05, - "loss": 0.901, - "step": 5375 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6163353412185497e-05, - "loss": 0.801, - "step": 5376 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6161782625297905e-05, - "loss": 0.9199, - "step": 5377 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6160211593284043e-05, - "loss": 0.7499, - "step": 5378 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6158640316206413e-05, - "loss": 0.6845, - "step": 5379 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.615706879412752e-05, - "loss": 0.9753, - "step": 5380 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6155497027109882e-05, - "loss": 0.8284, - "step": 5381 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.615392501521603e-05, - "loss": 0.8097, - "step": 5382 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6152352758508497e-05, - "loss": 0.6978, - "step": 5383 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.615078025704983e-05, - "loss": 0.7438, - "step": 5384 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6149207510902593e-05, - "loss": 0.8447, - "step": 5385 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.614763452012934e-05, - "loss": 0.7138, - "step": 5386 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.6146061284792658e-05, - "loss": 0.8367, - "step": 5387 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.614448780495513e-05, - "loss": 0.8116, - "step": 5388 - }, - { - "epoch": 0.97, - "grad_norm": 0.0, - "learning_rate": 1.614291408067935e-05, - "loss": 0.8052, - "step": 5389 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6141340112027927e-05, - "loss": 0.7182, - "step": 5390 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.613976589906347e-05, - "loss": 0.8389, - "step": 5391 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6138191441848607e-05, - "loss": 0.9523, - "step": 5392 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6136616740445975e-05, - "loss": 0.8029, - "step": 5393 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6135041794918215e-05, - "loss": 0.7883, - "step": 5394 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6133466605327987e-05, - "loss": 0.642, - "step": 5395 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6131891171737945e-05, - "loss": 0.7728, - "step": 5396 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.613031549421077e-05, - "loss": 0.6813, - "step": 5397 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.612873957280914e-05, - "loss": 0.7658, - "step": 5398 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6127163407595757e-05, - "loss": 0.6844, - "step": 5399 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.612558699863331e-05, - "loss": 0.7826, - "step": 5400 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6124010345984523e-05, - "loss": 0.8441, - "step": 5401 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6122433449712113e-05, - "loss": 0.7448, - "step": 5402 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.612085630987881e-05, - "loss": 0.7289, - "step": 5403 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.611927892654736e-05, - "loss": 0.6669, - "step": 5404 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6117701299780507e-05, - "loss": 0.8469, - "step": 5405 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6116123429641023e-05, - "loss": 0.7695, - "step": 5406 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.611454531619167e-05, - "loss": 0.7474, - "step": 5407 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6112966959495223e-05, - "loss": 0.8092, - "step": 5408 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6111388359614482e-05, - "loss": 0.8566, - "step": 5409 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6109809516612243e-05, - "loss": 0.7559, - "step": 5410 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6108230430551314e-05, - "loss": 0.8805, - "step": 5411 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6106651101494515e-05, - "loss": 0.7258, - "step": 5412 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6105071529504676e-05, - "loss": 0.7661, - "step": 5413 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.610349171464463e-05, - "loss": 0.8496, - "step": 5414 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6101911656977232e-05, - "loss": 0.8275, - "step": 5415 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.610033135656533e-05, - "loss": 0.6822, - "step": 5416 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.60987508134718e-05, - "loss": 0.7686, - "step": 5417 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6097170027759513e-05, - "loss": 0.8561, - "step": 5418 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6095588999491355e-05, - "loss": 0.9675, - "step": 5419 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.609400772873023e-05, - "loss": 0.6458, - "step": 5420 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6092426215539032e-05, - "loss": 0.7565, - "step": 5421 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6090844459980685e-05, - "loss": 0.8279, - "step": 5422 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.608926246211811e-05, - "loss": 0.7247, - "step": 5423 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6087680222014244e-05, - "loss": 0.7248, - "step": 5424 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6086097739732033e-05, - "loss": 0.773, - "step": 5425 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6084515015334423e-05, - "loss": 0.7548, - "step": 5426 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6082932048884385e-05, - "loss": 0.8016, - "step": 5427 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6081348840444887e-05, - "loss": 0.7821, - "step": 5428 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6079765390078915e-05, - "loss": 0.7625, - "step": 5429 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6078181697849462e-05, - "loss": 0.8064, - "step": 5430 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.607659776381953e-05, - "loss": 0.7952, - "step": 5431 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6075013588052126e-05, - "loss": 0.7692, - "step": 5432 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6073429170610274e-05, - "loss": 0.7759, - "step": 5433 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6071844511557005e-05, - "loss": 0.8686, - "step": 5434 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6070259610955363e-05, - "loss": 0.8674, - "step": 5435 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6068674468868393e-05, - "loss": 0.7271, - "step": 5436 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6067089085359156e-05, - "loss": 0.7783, - "step": 5437 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6065503460490724e-05, - "loss": 0.7945, - "step": 5438 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.606391759432617e-05, - "loss": 0.7149, - "step": 5439 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6062331486928587e-05, - "loss": 0.8145, - "step": 5440 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6060745138361077e-05, - "loss": 0.734, - "step": 5441 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6059158548686736e-05, - "loss": 0.825, - "step": 5442 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6057571717968688e-05, - "loss": 0.831, - "step": 5443 - }, - { - "epoch": 0.98, - "grad_norm": 0.0, - "learning_rate": 1.6055984646270065e-05, - "loss": 0.8127, - "step": 5444 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6054397333653998e-05, - "loss": 0.7489, - "step": 5445 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6052809780183628e-05, - "loss": 0.7996, - "step": 5446 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.605122198592212e-05, - "loss": 0.8211, - "step": 5447 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6049633950932634e-05, - "loss": 0.7401, - "step": 5448 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6048045675278344e-05, - "loss": 0.7712, - "step": 5449 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.604645715902244e-05, - "loss": 0.8351, - "step": 5450 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.604486840222811e-05, - "loss": 0.9276, - "step": 5451 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.604327940495856e-05, - "loss": 0.8689, - "step": 5452 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6041690167277e-05, - "loss": 0.8306, - "step": 5453 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.604010068924666e-05, - "loss": 0.7796, - "step": 5454 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6038510970930763e-05, - "loss": 0.8588, - "step": 5455 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6036921012392556e-05, - "loss": 0.8991, - "step": 5456 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.603533081369529e-05, - "loss": 0.7452, - "step": 5457 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6033740374902225e-05, - "loss": 0.9621, - "step": 5458 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6032149696076632e-05, - "loss": 0.7628, - "step": 5459 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.603055877728179e-05, - "loss": 0.8276, - "step": 5460 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.602896761858099e-05, - "loss": 0.7, - "step": 5461 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.602737622003753e-05, - "loss": 0.8811, - "step": 5462 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6025784581714718e-05, - "loss": 0.8624, - "step": 5463 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6024192703675873e-05, - "loss": 0.8332, - "step": 5464 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.602260058598432e-05, - "loss": 0.7612, - "step": 5465 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6021008228703395e-05, - "loss": 0.8362, - "step": 5466 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6019415631896457e-05, - "loss": 0.7838, - "step": 5467 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6017822795626848e-05, - "loss": 0.8339, - "step": 5468 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.601622971995794e-05, - "loss": 0.7321, - "step": 5469 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.601463640495311e-05, - "loss": 0.823, - "step": 5470 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6013042850675737e-05, - "loss": 0.8103, - "step": 5471 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.601144905718922e-05, - "loss": 0.9212, - "step": 5472 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.600985502455696e-05, - "loss": 0.7676, - "step": 5473 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6008260752842373e-05, - "loss": 0.731, - "step": 5474 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6006666242108878e-05, - "loss": 0.8458, - "step": 5475 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.600507149241991e-05, - "loss": 1.008, - "step": 5476 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.6003476503838912e-05, - "loss": 0.8191, - "step": 5477 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.600188127642934e-05, - "loss": 0.7995, - "step": 5478 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.600028581025464e-05, - "loss": 0.9464, - "step": 5479 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5998690105378294e-05, - "loss": 0.6837, - "step": 5480 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.599709416186378e-05, - "loss": 0.8724, - "step": 5481 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.599549797977459e-05, - "loss": 0.7618, - "step": 5482 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5993901559174213e-05, - "loss": 0.7749, - "step": 5483 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.599230490012617e-05, - "loss": 0.7903, - "step": 5484 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.599070800269397e-05, - "loss": 0.7095, - "step": 5485 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5989110866941142e-05, - "loss": 0.9691, - "step": 5486 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5987513492931225e-05, - "loss": 0.7068, - "step": 5487 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5985915880727762e-05, - "loss": 0.7824, - "step": 5488 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.598431803039431e-05, - "loss": 0.7265, - "step": 5489 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5982719941994442e-05, - "loss": 0.7305, - "step": 5490 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.598112161559172e-05, - "loss": 0.8703, - "step": 5491 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5979523051249737e-05, - "loss": 0.7414, - "step": 5492 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5977924249032085e-05, - "loss": 0.8032, - "step": 5493 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5976325209002357e-05, - "loss": 0.8232, - "step": 5494 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5974725931224183e-05, - "loss": 0.7264, - "step": 5495 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5973126415761175e-05, - "loss": 0.886, - "step": 5496 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.597152666267696e-05, - "loss": 0.7443, - "step": 5497 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.596992667203519e-05, - "loss": 0.7055, - "step": 5498 - }, - { - "epoch": 0.99, - "grad_norm": 0.0, - "learning_rate": 1.5968326443899507e-05, - "loss": 0.7096, - "step": 5499 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5966725978333573e-05, - "loss": 0.7727, - "step": 5500 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5965125275401063e-05, - "loss": 0.8126, - "step": 5501 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5963524335165644e-05, - "loss": 0.8238, - "step": 5502 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5961923157691016e-05, - "loss": 0.7529, - "step": 5503 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.596032174304087e-05, - "loss": 0.9409, - "step": 5504 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5958720091278915e-05, - "loss": 0.949, - "step": 5505 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5957118202468866e-05, - "loss": 0.6733, - "step": 5506 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.595551607667445e-05, - "loss": 0.7253, - "step": 5507 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.59539137139594e-05, - "loss": 0.971, - "step": 5508 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5952311114387466e-05, - "loss": 0.6312, - "step": 5509 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5950708278022398e-05, - "loss": 0.7672, - "step": 5510 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5949105204927963e-05, - "loss": 0.7767, - "step": 5511 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.594750189516793e-05, - "loss": 0.7047, - "step": 5512 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5945898348806078e-05, - "loss": 0.8808, - "step": 5513 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5944294565906208e-05, - "loss": 0.7863, - "step": 5514 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5942690546532117e-05, - "loss": 0.7583, - "step": 5515 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5941086290747617e-05, - "loss": 0.8499, - "step": 5516 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5939481798616524e-05, - "loss": 0.6948, - "step": 5517 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5937877070202672e-05, - "loss": 0.746, - "step": 5518 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5936272105569896e-05, - "loss": 0.7988, - "step": 5519 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.593466690478205e-05, - "loss": 0.8157, - "step": 5520 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5933061467902984e-05, - "loss": 0.7369, - "step": 5521 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.593145579499657e-05, - "loss": 0.8278, - "step": 5522 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5929849886126684e-05, - "loss": 0.8258, - "step": 5523 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5928243741357214e-05, - "loss": 0.7597, - "step": 5524 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5926637360752046e-05, - "loss": 0.8225, - "step": 5525 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5925030744375095e-05, - "loss": 0.7779, - "step": 5526 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.592342389229027e-05, - "loss": 0.8324, - "step": 5527 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5921816804561493e-05, - "loss": 0.6696, - "step": 5528 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.59202094812527e-05, - "loss": 0.6915, - "step": 5529 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5918601922427838e-05, - "loss": 0.6058, - "step": 5530 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5916994128150847e-05, - "loss": 0.6813, - "step": 5531 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.591538609848569e-05, - "loss": 0.6892, - "step": 5532 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5913777833496348e-05, - "loss": 0.5979, - "step": 5533 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.591216933324679e-05, - "loss": 0.7096, - "step": 5534 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5910560597801004e-05, - "loss": 0.7253, - "step": 5535 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5908951627222994e-05, - "loss": 0.6547, - "step": 5536 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5907342421576766e-05, - "loss": 0.6698, - "step": 5537 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.590573298092634e-05, - "loss": 0.6607, - "step": 5538 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5904123305335735e-05, - "loss": 0.5951, - "step": 5539 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.590251339486899e-05, - "loss": 0.6759, - "step": 5540 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5900903249590154e-05, - "loss": 0.8402, - "step": 5541 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5899292869563275e-05, - "loss": 0.6355, - "step": 5542 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.589768225485242e-05, - "loss": 0.6388, - "step": 5543 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.589607140552166e-05, - "loss": 0.6015, - "step": 5544 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5894460321635085e-05, - "loss": 0.6517, - "step": 5545 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.589284900325678e-05, - "loss": 0.7619, - "step": 5546 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.589123745045084e-05, - "loss": 0.692, - "step": 5547 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5889625663281383e-05, - "loss": 0.6767, - "step": 5548 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.588801364181253e-05, - "loss": 0.7481, - "step": 5549 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.588640138610841e-05, - "loss": 0.7503, - "step": 5550 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.588478889623316e-05, - "loss": 0.6212, - "step": 5551 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.588317617225092e-05, - "loss": 0.6995, - "step": 5552 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5881563214225857e-05, - "loss": 0.5972, - "step": 5553 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 1.5879950022222133e-05, - "loss": 0.6683, - "step": 5554 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5878336596303926e-05, - "loss": 0.7291, - "step": 5555 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5876722936535416e-05, - "loss": 0.6115, - "step": 5556 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.58751090429808e-05, - "loss": 0.6436, - "step": 5557 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5873494915704287e-05, - "loss": 0.5953, - "step": 5558 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.587188055477008e-05, - "loss": 0.7515, - "step": 5559 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5870265960242404e-05, - "loss": 0.6064, - "step": 5560 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5868651132185497e-05, - "loss": 0.6625, - "step": 5561 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.586703607066359e-05, - "loss": 0.6845, - "step": 5562 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5865420775740936e-05, - "loss": 0.6323, - "step": 5563 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.58638052474818e-05, - "loss": 0.7635, - "step": 5564 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5862189485950444e-05, - "loss": 0.7363, - "step": 5565 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5860573491211148e-05, - "loss": 0.6525, - "step": 5566 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.58589572633282e-05, - "loss": 0.6587, - "step": 5567 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5857340802365892e-05, - "loss": 0.6466, - "step": 5568 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.585572410838854e-05, - "loss": 0.6856, - "step": 5569 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.585410718146044e-05, - "loss": 0.7328, - "step": 5570 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5852490021645938e-05, - "loss": 0.5434, - "step": 5571 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.585087262900935e-05, - "loss": 0.6464, - "step": 5572 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5849255003615033e-05, - "loss": 0.6359, - "step": 5573 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5847637145527323e-05, - "loss": 0.6732, - "step": 5574 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5846019054810596e-05, - "loss": 0.7032, - "step": 5575 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5844400731529214e-05, - "loss": 0.6641, - "step": 5576 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.584278217574756e-05, - "loss": 0.7642, - "step": 5577 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.584116338753002e-05, - "loss": 0.5959, - "step": 5578 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5839544366940995e-05, - "loss": 0.6929, - "step": 5579 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.583792511404489e-05, - "loss": 0.5972, - "step": 5580 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5836305628906124e-05, - "loss": 0.6146, - "step": 5581 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.583468591158912e-05, - "loss": 0.5508, - "step": 5582 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.583306596215832e-05, - "loss": 0.6407, - "step": 5583 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5831445780678164e-05, - "loss": 0.721, - "step": 5584 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.58298253672131e-05, - "loss": 0.6755, - "step": 5585 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5828204721827596e-05, - "loss": 0.5617, - "step": 5586 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5826583844586125e-05, - "loss": 0.6966, - "step": 5587 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.582496273555317e-05, - "loss": 0.7272, - "step": 5588 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5823341394793214e-05, - "loss": 0.7964, - "step": 5589 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5821719822370763e-05, - "loss": 0.6911, - "step": 5590 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5820098018350324e-05, - "loss": 0.604, - "step": 5591 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.581847598279642e-05, - "loss": 0.6016, - "step": 5592 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.581685371577357e-05, - "loss": 0.6009, - "step": 5593 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5815231217346315e-05, - "loss": 0.6505, - "step": 5594 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5813608487579197e-05, - "loss": 0.5872, - "step": 5595 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.581198552653678e-05, - "loss": 0.6805, - "step": 5596 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.581036233428362e-05, - "loss": 0.6557, - "step": 5597 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.580873891088429e-05, - "loss": 0.7077, - "step": 5598 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.580711525640338e-05, - "loss": 0.7127, - "step": 5599 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.580549137090547e-05, - "loss": 0.7019, - "step": 5600 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5803867254455175e-05, - "loss": 0.5584, - "step": 5601 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5802242907117095e-05, - "loss": 0.5636, - "step": 5602 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.580061832895585e-05, - "loss": 0.7399, - "step": 5603 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5798993520036074e-05, - "loss": 0.6801, - "step": 5604 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.57973684804224e-05, - "loss": 0.5377, - "step": 5605 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.579574321017948e-05, - "loss": 0.5325, - "step": 5606 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.579411770937196e-05, - "loss": 0.5812, - "step": 5607 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5792491978064516e-05, - "loss": 0.7423, - "step": 5608 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.579086601632182e-05, - "loss": 0.6833, - "step": 5609 - }, - { - "epoch": 1.01, - "grad_norm": 0.0, - "learning_rate": 1.5789239824208548e-05, - "loss": 0.6893, - "step": 5610 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5787613401789404e-05, - "loss": 0.8077, - "step": 5611 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5785986749129077e-05, - "loss": 0.6433, - "step": 5612 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.578435986629229e-05, - "loss": 0.6306, - "step": 5613 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.578273275334376e-05, - "loss": 0.6581, - "step": 5614 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5781105410348212e-05, - "loss": 0.6389, - "step": 5615 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5779477837370386e-05, - "loss": 0.6594, - "step": 5616 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5777850034475034e-05, - "loss": 0.5911, - "step": 5617 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5776222001726907e-05, - "loss": 0.6248, - "step": 5618 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5774593739190776e-05, - "loss": 0.6701, - "step": 5619 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5772965246931413e-05, - "loss": 0.5072, - "step": 5620 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.57713365250136e-05, - "loss": 0.6407, - "step": 5621 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5769707573502134e-05, - "loss": 0.6884, - "step": 5622 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5768078392461822e-05, - "loss": 0.6472, - "step": 5623 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5766448981957462e-05, - "loss": 0.5944, - "step": 5624 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.576481934205389e-05, - "loss": 0.6139, - "step": 5625 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5763189472815924e-05, - "loss": 0.5733, - "step": 5626 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.576155937430841e-05, - "loss": 0.5754, - "step": 5627 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5759929046596198e-05, - "loss": 0.5713, - "step": 5628 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5758298489744136e-05, - "loss": 0.6183, - "step": 5629 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5756667703817095e-05, - "loss": 0.6284, - "step": 5630 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5755036688879952e-05, - "loss": 0.7033, - "step": 5631 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5753405444997593e-05, - "loss": 0.5836, - "step": 5632 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5751773972234907e-05, - "loss": 0.5203, - "step": 5633 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.57501422706568e-05, - "loss": 0.5215, - "step": 5634 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5748510340328183e-05, - "loss": 0.582, - "step": 5635 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5746878181313975e-05, - "loss": 0.7273, - "step": 5636 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.574524579367911e-05, - "loss": 0.6924, - "step": 5637 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.574361317748853e-05, - "loss": 0.5686, - "step": 5638 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.574198033280717e-05, - "loss": 0.6781, - "step": 5639 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5740347259699998e-05, - "loss": 0.6581, - "step": 5640 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.573871395823198e-05, - "loss": 0.6601, - "step": 5641 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5737080428468087e-05, - "loss": 0.6212, - "step": 5642 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5735446670473308e-05, - "loss": 0.6136, - "step": 5643 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.573381268431264e-05, - "loss": 0.6448, - "step": 5644 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5732178470051076e-05, - "loss": 0.6247, - "step": 5645 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5730544027753632e-05, - "loss": 0.6209, - "step": 5646 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.572890935748533e-05, - "loss": 0.6971, - "step": 5647 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.57272744593112e-05, - "loss": 0.6509, - "step": 5648 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.572563933329628e-05, - "loss": 0.6552, - "step": 5649 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.572400397950562e-05, - "loss": 0.615, - "step": 5650 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5722368398004278e-05, - "loss": 0.6555, - "step": 5651 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5720732588857316e-05, - "loss": 0.6281, - "step": 5652 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.571909655212981e-05, - "loss": 0.5746, - "step": 5653 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5717460287886845e-05, - "loss": 0.7013, - "step": 5654 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5715823796193517e-05, - "loss": 0.6338, - "step": 5655 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.571418707711493e-05, - "loss": 0.5453, - "step": 5656 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.571255013071619e-05, - "loss": 0.5801, - "step": 5657 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5710912957062417e-05, - "loss": 0.8431, - "step": 5658 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5709275556218744e-05, - "loss": 0.694, - "step": 5659 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.570763792825031e-05, - "loss": 0.6782, - "step": 5660 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5706000073222263e-05, - "loss": 0.6603, - "step": 5661 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5704361991199756e-05, - "loss": 0.5826, - "step": 5662 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.5702723682247957e-05, - "loss": 0.5963, - "step": 5663 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.570108514643204e-05, - "loss": 0.6165, - "step": 5664 - }, - { - "epoch": 1.02, - "grad_norm": 0.0, - "learning_rate": 1.569944638381719e-05, - "loss": 0.6247, - "step": 5665 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.56978073944686e-05, - "loss": 0.7168, - "step": 5666 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.569616817845147e-05, - "loss": 0.6832, - "step": 5667 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5694528735831013e-05, - "loss": 0.6858, - "step": 5668 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5692889066672447e-05, - "loss": 0.5576, - "step": 5669 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5691249171040998e-05, - "loss": 0.5929, - "step": 5670 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5689609049001914e-05, - "loss": 0.745, - "step": 5671 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.568796870062043e-05, - "loss": 0.6847, - "step": 5672 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5686328125961808e-05, - "loss": 0.6028, - "step": 5673 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5684687325091313e-05, - "loss": 0.5904, - "step": 5674 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.568304629807422e-05, - "loss": 0.6213, - "step": 5675 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.568140504497581e-05, - "loss": 0.6275, - "step": 5676 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5679763565861367e-05, - "loss": 0.6632, - "step": 5677 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.56781218607962e-05, - "loss": 0.639, - "step": 5678 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5676479929845628e-05, - "loss": 0.6409, - "step": 5679 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5674837773074956e-05, - "loss": 0.7301, - "step": 5680 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.567319539054951e-05, - "loss": 0.6367, - "step": 5681 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.567155278233464e-05, - "loss": 0.6125, - "step": 5682 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5669909948495682e-05, - "loss": 0.5189, - "step": 5683 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.566826688909799e-05, - "loss": 0.6163, - "step": 5684 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5666623604206934e-05, - "loss": 0.6117, - "step": 5685 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.566498009388788e-05, - "loss": 0.6106, - "step": 5686 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5663336358206217e-05, - "loss": 0.6441, - "step": 5687 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5661692397227333e-05, - "loss": 0.6491, - "step": 5688 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5660048211016623e-05, - "loss": 0.6499, - "step": 5689 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5658403799639503e-05, - "loss": 0.6257, - "step": 5690 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5656759163161385e-05, - "loss": 0.6251, - "step": 5691 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5655114301647694e-05, - "loss": 0.6551, - "step": 5692 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.565346921516387e-05, - "loss": 0.5852, - "step": 5693 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5651823903775353e-05, - "loss": 0.6095, - "step": 5694 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5650178367547604e-05, - "loss": 0.6689, - "step": 5695 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5648532606546075e-05, - "loss": 0.6753, - "step": 5696 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5646886620836247e-05, - "loss": 0.6193, - "step": 5697 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5645240410483594e-05, - "loss": 0.6075, - "step": 5698 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5643593975553607e-05, - "loss": 0.5713, - "step": 5699 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5641947316111787e-05, - "loss": 0.614, - "step": 5700 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.564030043222363e-05, - "loss": 0.6508, - "step": 5701 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5638653323954664e-05, - "loss": 0.5947, - "step": 5702 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5637005991370407e-05, - "loss": 0.7159, - "step": 5703 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5635358434536397e-05, - "loss": 0.7059, - "step": 5704 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5633710653518174e-05, - "loss": 0.7629, - "step": 5705 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.563206264838129e-05, - "loss": 0.6667, - "step": 5706 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5630414419191307e-05, - "loss": 0.5984, - "step": 5707 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.562876596601379e-05, - "loss": 0.6244, - "step": 5708 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5627117288914324e-05, - "loss": 0.6404, - "step": 5709 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5625468387958492e-05, - "loss": 0.6751, - "step": 5710 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5623819263211886e-05, - "loss": 0.6055, - "step": 5711 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.562216991474012e-05, - "loss": 0.674, - "step": 5712 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.56205203426088e-05, - "loss": 0.6752, - "step": 5713 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5618870546883555e-05, - "loss": 0.6739, - "step": 5714 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.561722052763001e-05, - "loss": 0.7326, - "step": 5715 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5615570284913814e-05, - "loss": 0.6499, - "step": 5716 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5613919818800612e-05, - "loss": 0.7254, - "step": 5717 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5612269129356064e-05, - "loss": 0.7122, - "step": 5718 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5610618216645832e-05, - "loss": 0.7065, - "step": 5719 - }, - { - "epoch": 1.03, - "grad_norm": 0.0, - "learning_rate": 1.5608967080735595e-05, - "loss": 0.6028, - "step": 5720 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5607315721691044e-05, - "loss": 0.6152, - "step": 5721 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5605664139577864e-05, - "loss": 0.6196, - "step": 5722 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.560401233446176e-05, - "loss": 0.6892, - "step": 5723 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5602360306408448e-05, - "loss": 0.5774, - "step": 5724 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5600708055483643e-05, - "loss": 0.626, - "step": 5725 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.559905558175308e-05, - "loss": 0.6333, - "step": 5726 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.559740288528249e-05, - "loss": 0.5681, - "step": 5727 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5595749966137632e-05, - "loss": 0.632, - "step": 5728 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5594096824384248e-05, - "loss": 0.6018, - "step": 5729 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.559244346008811e-05, - "loss": 0.6633, - "step": 5730 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5590789873314994e-05, - "loss": 0.5929, - "step": 5731 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5589136064130675e-05, - "loss": 0.6202, - "step": 5732 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.558748203260095e-05, - "loss": 0.5754, - "step": 5733 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.558582777879162e-05, - "loss": 0.6076, - "step": 5734 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.558417330276849e-05, - "loss": 0.5737, - "step": 5735 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5582518604597377e-05, - "loss": 0.6495, - "step": 5736 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5580863684344116e-05, - "loss": 0.7725, - "step": 5737 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5579208542074534e-05, - "loss": 0.6089, - "step": 5738 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5577553177854482e-05, - "loss": 0.608, - "step": 5739 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5575897591749803e-05, - "loss": 0.7113, - "step": 5740 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.557424178382637e-05, - "loss": 0.6082, - "step": 5741 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5572585754150048e-05, - "loss": 0.6167, - "step": 5742 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5570929502786722e-05, - "loss": 0.6131, - "step": 5743 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.556927302980227e-05, - "loss": 0.6896, - "step": 5744 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5567616335262603e-05, - "loss": 0.7988, - "step": 5745 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.556595941923362e-05, - "loss": 0.6638, - "step": 5746 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5564302281781232e-05, - "loss": 0.5992, - "step": 5747 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.556264492297137e-05, - "loss": 0.8191, - "step": 5748 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5560987342869962e-05, - "loss": 0.6782, - "step": 5749 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.555932954154295e-05, - "loss": 0.5072, - "step": 5750 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.555767151905629e-05, - "loss": 0.6106, - "step": 5751 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.555601327547593e-05, - "loss": 0.635, - "step": 5752 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5554354810867848e-05, - "loss": 0.6016, - "step": 5753 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5552696125298016e-05, - "loss": 0.5406, - "step": 5754 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.555103721883242e-05, - "loss": 0.6949, - "step": 5755 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5549378091537052e-05, - "loss": 0.6622, - "step": 5756 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.554771874347792e-05, - "loss": 0.6844, - "step": 5757 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.554605917472103e-05, - "loss": 0.5933, - "step": 5758 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.554439938533241e-05, - "loss": 0.6255, - "step": 5759 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5542739375378076e-05, - "loss": 0.6053, - "step": 5760 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.554107914492408e-05, - "loss": 0.6356, - "step": 5761 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.553941869403646e-05, - "loss": 0.6225, - "step": 5762 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.553775802278128e-05, - "loss": 0.5838, - "step": 5763 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5536097131224594e-05, - "loss": 0.6756, - "step": 5764 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.553443601943248e-05, - "loss": 0.757, - "step": 5765 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5532774687471026e-05, - "loss": 0.6452, - "step": 5766 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5531113135406314e-05, - "loss": 0.762, - "step": 5767 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5529451363304442e-05, - "loss": 0.7593, - "step": 5768 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5527789371231526e-05, - "loss": 0.654, - "step": 5769 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5526127159253677e-05, - "loss": 0.6969, - "step": 5770 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5524464727437026e-05, - "loss": 0.7127, - "step": 5771 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5522802075847706e-05, - "loss": 0.6909, - "step": 5772 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5521139204551853e-05, - "loss": 0.6268, - "step": 5773 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5519476113615626e-05, - "loss": 0.6158, - "step": 5774 - }, - { - "epoch": 1.04, - "grad_norm": 0.0, - "learning_rate": 1.5517812803105184e-05, - "loss": 0.6722, - "step": 5775 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5516149273086695e-05, - "loss": 0.6601, - "step": 5776 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.551448552362634e-05, - "loss": 0.6471, - "step": 5777 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5512821554790306e-05, - "loss": 0.5868, - "step": 5778 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.551115736664478e-05, - "loss": 0.7239, - "step": 5779 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.550949295925598e-05, - "loss": 0.6413, - "step": 5780 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.550782833269011e-05, - "loss": 0.7011, - "step": 5781 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.550616348701339e-05, - "loss": 0.6932, - "step": 5782 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5504498422292055e-05, - "loss": 0.632, - "step": 5783 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5502833138592344e-05, - "loss": 0.7439, - "step": 5784 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.550116763598051e-05, - "loss": 0.6711, - "step": 5785 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5499501914522793e-05, - "loss": 0.6923, - "step": 5786 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5497835974285473e-05, - "loss": 0.7158, - "step": 5787 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.549616981533482e-05, - "loss": 0.7489, - "step": 5788 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5494503437737117e-05, - "loss": 0.7342, - "step": 5789 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.549283684155865e-05, - "loss": 0.5488, - "step": 5790 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.549117002686573e-05, - "loss": 0.6704, - "step": 5791 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5489502993724658e-05, - "loss": 0.6729, - "step": 5792 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5487835742201748e-05, - "loss": 0.6108, - "step": 5793 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.548616827236334e-05, - "loss": 0.7149, - "step": 5794 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5484500584275752e-05, - "loss": 0.6091, - "step": 5795 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.548283267800534e-05, - "loss": 0.6687, - "step": 5796 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5481164553618446e-05, - "loss": 0.729, - "step": 5797 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5479496211181437e-05, - "loss": 0.646, - "step": 5798 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5477827650760685e-05, - "loss": 0.6476, - "step": 5799 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5476158872422568e-05, - "loss": 0.6477, - "step": 5800 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5474489876233465e-05, - "loss": 0.6582, - "step": 5801 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5472820662259774e-05, - "loss": 0.6094, - "step": 5802 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5471151230567906e-05, - "loss": 0.7876, - "step": 5803 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5469481581224274e-05, - "loss": 0.5788, - "step": 5804 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5467811714295286e-05, - "loss": 0.5813, - "step": 5805 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5466141629847384e-05, - "loss": 0.682, - "step": 5806 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5464471327947005e-05, - "loss": 0.774, - "step": 5807 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5462800808660597e-05, - "loss": 0.7095, - "step": 5808 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.546113007205461e-05, - "loss": 0.6798, - "step": 5809 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5459459118195517e-05, - "loss": 0.5548, - "step": 5810 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5457787947149786e-05, - "loss": 0.6018, - "step": 5811 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5456116558983903e-05, - "loss": 0.6432, - "step": 5812 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5454444953764353e-05, - "loss": 0.6545, - "step": 5813 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5452773131557642e-05, - "loss": 0.5475, - "step": 5814 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5451101092430273e-05, - "loss": 0.6252, - "step": 5815 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5449428836448764e-05, - "loss": 0.5901, - "step": 5816 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.544775636367964e-05, - "loss": 0.6981, - "step": 5817 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5446083674189435e-05, - "loss": 0.6969, - "step": 5818 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5444410768044694e-05, - "loss": 0.6568, - "step": 5819 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5442737645311968e-05, - "loss": 0.6712, - "step": 5820 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.544106430605781e-05, - "loss": 0.6913, - "step": 5821 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5439390750348794e-05, - "loss": 0.6223, - "step": 5822 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5437716978251497e-05, - "loss": 0.5697, - "step": 5823 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.54360429898325e-05, - "loss": 0.7126, - "step": 5824 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5434368785158405e-05, - "loss": 0.5826, - "step": 5825 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5432694364295805e-05, - "loss": 0.6064, - "step": 5826 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5431019727311322e-05, - "loss": 0.7184, - "step": 5827 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5429344874271567e-05, - "loss": 0.6235, - "step": 5828 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5427669805243174e-05, - "loss": 0.6466, - "step": 5829 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5425994520292778e-05, - "loss": 0.6696, - "step": 5830 - }, - { - "epoch": 1.05, - "grad_norm": 0.0, - "learning_rate": 1.5424319019487022e-05, - "loss": 0.565, - "step": 5831 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5422643302892563e-05, - "loss": 0.6332, - "step": 5832 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5420967370576064e-05, - "loss": 0.757, - "step": 5833 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.54192912226042e-05, - "loss": 0.5596, - "step": 5834 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5417614859043645e-05, - "loss": 0.6394, - "step": 5835 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5415938279961092e-05, - "loss": 0.5438, - "step": 5836 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5414261485423234e-05, - "loss": 0.617, - "step": 5837 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.541258447549678e-05, - "loss": 0.6445, - "step": 5838 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5410907250248445e-05, - "loss": 0.7699, - "step": 5839 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5409229809744945e-05, - "loss": 0.7096, - "step": 5840 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.540755215405302e-05, - "loss": 0.626, - "step": 5841 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5405874283239405e-05, - "loss": 0.5855, - "step": 5842 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5404196197370853e-05, - "loss": 0.5843, - "step": 5843 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5402517896514115e-05, - "loss": 0.6718, - "step": 5844 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5400839380735963e-05, - "loss": 0.6433, - "step": 5845 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5399160650103168e-05, - "loss": 0.7139, - "step": 5846 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5397481704682512e-05, - "loss": 0.4976, - "step": 5847 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5395802544540786e-05, - "loss": 0.7225, - "step": 5848 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.539412316974479e-05, - "loss": 0.6651, - "step": 5849 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.539244358036134e-05, - "loss": 0.6427, - "step": 5850 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.539076377645724e-05, - "loss": 0.7028, - "step": 5851 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5389083758099328e-05, - "loss": 0.6402, - "step": 5852 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.538740352535443e-05, - "loss": 0.611, - "step": 5853 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.538572307828939e-05, - "loss": 0.7042, - "step": 5854 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.538404241697106e-05, - "loss": 0.7012, - "step": 5855 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5382361541466296e-05, - "loss": 0.5506, - "step": 5856 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5380680451841972e-05, - "loss": 0.5959, - "step": 5857 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5378999148164965e-05, - "loss": 0.7297, - "step": 5858 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5377317630502157e-05, - "loss": 0.6435, - "step": 5859 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5375635898920432e-05, - "loss": 0.6762, - "step": 5860 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5373953953486713e-05, - "loss": 0.6075, - "step": 5861 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5372271794267896e-05, - "loss": 0.738, - "step": 5862 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5370589421330903e-05, - "loss": 0.5858, - "step": 5863 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5368906834742666e-05, - "loss": 0.6536, - "step": 5864 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.536722403457011e-05, - "loss": 0.5775, - "step": 5865 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5365541020880195e-05, - "loss": 0.5198, - "step": 5866 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5363857793739866e-05, - "loss": 0.712, - "step": 5867 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.536217435321608e-05, - "loss": 0.5692, - "step": 5868 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5360490699375815e-05, - "loss": 0.7279, - "step": 5869 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.535880683228605e-05, - "loss": 0.5364, - "step": 5870 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5357122752013763e-05, - "loss": 0.6127, - "step": 5871 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.535543845862596e-05, - "loss": 0.6224, - "step": 5872 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5353753952189638e-05, - "loss": 0.7007, - "step": 5873 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5352069232771817e-05, - "loss": 0.6241, - "step": 5874 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.535038430043951e-05, - "loss": 0.6875, - "step": 5875 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5348699155259748e-05, - "loss": 0.6819, - "step": 5876 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5347013797299573e-05, - "loss": 0.708, - "step": 5877 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.534532822662603e-05, - "loss": 0.6597, - "step": 5878 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.534364244330617e-05, - "loss": 0.5812, - "step": 5879 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.534195644740706e-05, - "loss": 0.5496, - "step": 5880 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5340270238995775e-05, - "loss": 0.6669, - "step": 5881 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.533858381813939e-05, - "loss": 0.6562, - "step": 5882 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5336897184904996e-05, - "loss": 0.7009, - "step": 5883 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5335210339359684e-05, - "loss": 0.7178, - "step": 5884 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.533352328157057e-05, - "loss": 0.5845, - "step": 5885 - }, - { - "epoch": 1.06, - "grad_norm": 0.0, - "learning_rate": 1.5331836011604764e-05, - "loss": 0.6238, - "step": 5886 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.533014852952938e-05, - "loss": 0.6533, - "step": 5887 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5328460835411565e-05, - "loss": 0.7014, - "step": 5888 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5326772929318448e-05, - "loss": 0.6377, - "step": 5889 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5325084811317176e-05, - "loss": 0.6685, - "step": 5890 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5323396481474912e-05, - "loss": 0.6579, - "step": 5891 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.532170793985881e-05, - "loss": 0.6494, - "step": 5892 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5320019186536056e-05, - "loss": 0.6076, - "step": 5893 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.531833022157382e-05, - "loss": 0.6491, - "step": 5894 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5316641045039297e-05, - "loss": 0.6908, - "step": 5895 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.531495165699969e-05, - "loss": 0.7671, - "step": 5896 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5313262057522198e-05, - "loss": 0.6526, - "step": 5897 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.531157224667404e-05, - "loss": 0.6057, - "step": 5898 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.530988222452244e-05, - "loss": 0.6544, - "step": 5899 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5308191991134623e-05, - "loss": 0.6517, - "step": 5900 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.530650154657784e-05, - "loss": 0.6441, - "step": 5901 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.530481089091934e-05, - "loss": 0.7131, - "step": 5902 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5303120024226366e-05, - "loss": 0.6068, - "step": 5903 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5301428946566195e-05, - "loss": 0.7178, - "step": 5904 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5299737658006103e-05, - "loss": 0.6564, - "step": 5905 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.529804615861336e-05, - "loss": 0.6616, - "step": 5906 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5296354448455274e-05, - "loss": 0.6321, - "step": 5907 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.529466252759913e-05, - "loss": 0.7946, - "step": 5908 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.529297039611224e-05, - "loss": 0.6187, - "step": 5909 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5291278054061922e-05, - "loss": 0.5873, - "step": 5910 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5289585501515495e-05, - "loss": 0.6013, - "step": 5911 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.52878927385403e-05, - "loss": 0.5951, - "step": 5912 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.528619976520367e-05, - "loss": 0.706, - "step": 5913 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.528450658157296e-05, - "loss": 0.6034, - "step": 5914 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5282813187715523e-05, - "loss": 0.5979, - "step": 5915 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.528111958369873e-05, - "loss": 0.6637, - "step": 5916 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.527942576958995e-05, - "loss": 0.6765, - "step": 5917 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.527773174545657e-05, - "loss": 0.6572, - "step": 5918 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.527603751136598e-05, - "loss": 0.7135, - "step": 5919 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.527434306738558e-05, - "loss": 0.6426, - "step": 5920 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5272648413582778e-05, - "loss": 0.7065, - "step": 5921 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5270953550024986e-05, - "loss": 0.599, - "step": 5922 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5269258476779635e-05, - "loss": 0.6278, - "step": 5923 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5267563193914154e-05, - "loss": 0.6119, - "step": 5924 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5265867701495986e-05, - "loss": 0.6523, - "step": 5925 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.526417199959258e-05, - "loss": 0.6441, - "step": 5926 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5262476088271393e-05, - "loss": 0.6617, - "step": 5927 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5260779967599892e-05, - "loss": 0.6122, - "step": 5928 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5259083637645557e-05, - "loss": 0.7223, - "step": 5929 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5257387098475857e-05, - "loss": 0.6591, - "step": 5930 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5255690350158297e-05, - "loss": 0.6373, - "step": 5931 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.525399339276037e-05, - "loss": 0.629, - "step": 5932 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5252296226349582e-05, - "loss": 0.677, - "step": 5933 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5250598850993459e-05, - "loss": 0.7551, - "step": 5934 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5248901266759517e-05, - "loss": 0.6768, - "step": 5935 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5247203473715289e-05, - "loss": 0.5504, - "step": 5936 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5245505471928317e-05, - "loss": 0.6191, - "step": 5937 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5243807261466152e-05, - "loss": 0.6881, - "step": 5938 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5242108842396351e-05, - "loss": 0.6694, - "step": 5939 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5240410214786483e-05, - "loss": 0.5316, - "step": 5940 - }, - { - "epoch": 1.07, - "grad_norm": 0.0, - "learning_rate": 1.5238711378704115e-05, - "loss": 0.5842, - "step": 5941 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5237012334216837e-05, - "loss": 0.7784, - "step": 5942 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5235313081392236e-05, - "loss": 0.5658, - "step": 5943 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.523361362029791e-05, - "loss": 0.6683, - "step": 5944 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.523191395100147e-05, - "loss": 0.7099, - "step": 5945 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5230214073570531e-05, - "loss": 0.5736, - "step": 5946 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5228513988072716e-05, - "loss": 0.7325, - "step": 5947 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.522681369457566e-05, - "loss": 0.6316, - "step": 5948 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5225113193146997e-05, - "loss": 0.7201, - "step": 5949 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5223412483854381e-05, - "loss": 0.5462, - "step": 5950 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.522171156676547e-05, - "loss": 0.5835, - "step": 5951 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5220010441947925e-05, - "loss": 0.5543, - "step": 5952 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5218309109469424e-05, - "loss": 0.7107, - "step": 5953 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5216607569397646e-05, - "loss": 0.7861, - "step": 5954 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5214905821800286e-05, - "loss": 0.5808, - "step": 5955 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5213203866745034e-05, - "loss": 0.6547, - "step": 5956 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5211501704299603e-05, - "loss": 0.6207, - "step": 5957 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5209799334531707e-05, - "loss": 0.6356, - "step": 5958 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5208096757509068e-05, - "loss": 0.6249, - "step": 5959 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.520639397329942e-05, - "loss": 0.6956, - "step": 5960 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.52046909819705e-05, - "loss": 0.4986, - "step": 5961 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5202987783590055e-05, - "loss": 0.6271, - "step": 5962 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5201284378225843e-05, - "loss": 0.6009, - "step": 5963 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5199580765945629e-05, - "loss": 0.7308, - "step": 5964 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5197876946817182e-05, - "loss": 0.5387, - "step": 5965 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.519617292090829e-05, - "loss": 0.7157, - "step": 5966 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.519446868828674e-05, - "loss": 0.612, - "step": 5967 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.519276424902032e-05, - "loss": 0.6212, - "step": 5968 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5191059603176843e-05, - "loss": 0.6671, - "step": 5969 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5189354750824126e-05, - "loss": 0.6584, - "step": 5970 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5187649692029984e-05, - "loss": 0.5519, - "step": 5971 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5185944426862254e-05, - "loss": 0.6712, - "step": 5972 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5184238955388766e-05, - "loss": 0.623, - "step": 5973 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5182533277677376e-05, - "loss": 0.5525, - "step": 5974 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5180827393795932e-05, - "loss": 0.5658, - "step": 5975 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5179121303812296e-05, - "loss": 0.5979, - "step": 5976 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5177415007794345e-05, - "loss": 0.594, - "step": 5977 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5175708505809956e-05, - "loss": 0.7165, - "step": 5978 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5174001797927015e-05, - "loss": 0.6655, - "step": 5979 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.517229488421342e-05, - "loss": 0.5978, - "step": 5980 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5170587764737077e-05, - "loss": 0.6361, - "step": 5981 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.516888043956589e-05, - "loss": 0.5941, - "step": 5982 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5167172908767786e-05, - "loss": 0.713, - "step": 5983 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5165465172410692e-05, - "loss": 0.6474, - "step": 5984 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5163757230562542e-05, - "loss": 0.6604, - "step": 5985 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5162049083291288e-05, - "loss": 0.7208, - "step": 5986 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5160340730664875e-05, - "loss": 0.6324, - "step": 5987 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5158632172751268e-05, - "loss": 0.6273, - "step": 5988 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5156923409618438e-05, - "loss": 0.663, - "step": 5989 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5155214441334359e-05, - "loss": 0.6812, - "step": 5990 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5153505267967016e-05, - "loss": 0.744, - "step": 5991 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5151795889584407e-05, - "loss": 0.6954, - "step": 5992 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5150086306254534e-05, - "loss": 0.632, - "step": 5993 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5148376518045405e-05, - "loss": 0.6755, - "step": 5994 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5146666525025033e-05, - "loss": 0.7142, - "step": 5995 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5144956327261455e-05, - "loss": 0.6671, - "step": 5996 - }, - { - "epoch": 1.08, - "grad_norm": 0.0, - "learning_rate": 1.5143245924822699e-05, - "loss": 0.6473, - "step": 5997 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.514153531777681e-05, - "loss": 0.6497, - "step": 5998 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5139824506191839e-05, - "loss": 0.6744, - "step": 5999 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5138113490135839e-05, - "loss": 0.6497, - "step": 6000 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5136402269676888e-05, - "loss": 0.7102, - "step": 6001 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5134690844883056e-05, - "loss": 0.5979, - "step": 6002 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.513297921582242e-05, - "loss": 0.6599, - "step": 6003 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5131267382563085e-05, - "loss": 0.6739, - "step": 6004 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.512955534517314e-05, - "loss": 0.6311, - "step": 6005 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5127843103720689e-05, - "loss": 0.6358, - "step": 6006 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5126130658273863e-05, - "loss": 0.6552, - "step": 6007 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5124418008900776e-05, - "loss": 0.7575, - "step": 6008 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5122705155669564e-05, - "loss": 0.6127, - "step": 6009 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5120992098648362e-05, - "loss": 0.7113, - "step": 6010 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.511927883790532e-05, - "loss": 0.5771, - "step": 6011 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.51175653735086e-05, - "loss": 0.5673, - "step": 6012 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5115851705526362e-05, - "loss": 0.6669, - "step": 6013 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5114137834026776e-05, - "loss": 0.6092, - "step": 6014 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.511242375907803e-05, - "loss": 0.6272, - "step": 6015 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5110709480748308e-05, - "loss": 0.6037, - "step": 6016 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5108994999105806e-05, - "loss": 0.5464, - "step": 6017 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.510728031421873e-05, - "loss": 0.5993, - "step": 6018 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5105565426155297e-05, - "loss": 0.7171, - "step": 6019 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5103850334983725e-05, - "loss": 0.6274, - "step": 6020 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5102135040772244e-05, - "loss": 0.7416, - "step": 6021 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5100419543589085e-05, - "loss": 0.6717, - "step": 6022 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5098703843502505e-05, - "loss": 0.7043, - "step": 6023 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5096987940580747e-05, - "loss": 0.575, - "step": 6024 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5095271834892078e-05, - "loss": 0.6561, - "step": 6025 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5093555526504768e-05, - "loss": 0.7636, - "step": 6026 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5091839015487093e-05, - "loss": 0.6788, - "step": 6027 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.509012230190734e-05, - "loss": 0.5983, - "step": 6028 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5088405385833804e-05, - "loss": 0.6646, - "step": 6029 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5086688267334779e-05, - "loss": 0.6516, - "step": 6030 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5084970946478588e-05, - "loss": 0.6989, - "step": 6031 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5083253423333535e-05, - "loss": 0.6398, - "step": 6032 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5081535697967954e-05, - "loss": 0.611, - "step": 6033 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.507981777045018e-05, - "loss": 0.712, - "step": 6034 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5078099640848551e-05, - "loss": 0.7548, - "step": 6035 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5076381309231422e-05, - "loss": 0.7042, - "step": 6036 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5074662775667146e-05, - "loss": 0.5486, - "step": 6037 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5072944040224088e-05, - "loss": 0.5852, - "step": 6038 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5071225102970633e-05, - "loss": 0.5677, - "step": 6039 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5069505963975153e-05, - "loss": 0.6797, - "step": 6040 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.506778662330604e-05, - "loss": 0.6754, - "step": 6041 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5066067081031696e-05, - "loss": 0.6475, - "step": 6042 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5064347337220522e-05, - "loss": 0.6197, - "step": 6043 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5062627391940936e-05, - "loss": 0.6738, - "step": 6044 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5060907245261359e-05, - "loss": 0.6933, - "step": 6045 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5059186897250222e-05, - "loss": 0.618, - "step": 6046 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5057466347975964e-05, - "loss": 0.7246, - "step": 6047 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.505574559750703e-05, - "loss": 0.641, - "step": 6048 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5054024645911875e-05, - "loss": 0.5943, - "step": 6049 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5052303493258964e-05, - "loss": 0.6662, - "step": 6050 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5050582139616765e-05, - "loss": 0.6982, - "step": 6051 - }, - { - "epoch": 1.09, - "grad_norm": 0.0, - "learning_rate": 1.5048860585053752e-05, - "loss": 0.6882, - "step": 6052 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5047138829638419e-05, - "loss": 0.6725, - "step": 6053 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5045416873439256e-05, - "loss": 0.6832, - "step": 6054 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5043694716524769e-05, - "loss": 0.6481, - "step": 6055 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5041972358963464e-05, - "loss": 0.7003, - "step": 6056 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5040249800823862e-05, - "loss": 0.6517, - "step": 6057 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5038527042174486e-05, - "loss": 0.5749, - "step": 6058 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5036804083083875e-05, - "loss": 0.6467, - "step": 6059 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5035080923620568e-05, - "loss": 0.6906, - "step": 6060 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5033357563853117e-05, - "loss": 0.7274, - "step": 6061 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.503163400385008e-05, - "loss": 0.5855, - "step": 6062 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.502991024368002e-05, - "loss": 0.7432, - "step": 6063 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5028186283411516e-05, - "loss": 0.6749, - "step": 6064 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5026462123113147e-05, - "loss": 0.6188, - "step": 6065 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5024737762853507e-05, - "loss": 0.6021, - "step": 6066 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5023013202701189e-05, - "loss": 0.6105, - "step": 6067 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.50212884427248e-05, - "loss": 0.7061, - "step": 6068 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5019563482992955e-05, - "loss": 0.5741, - "step": 6069 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5017838323574276e-05, - "loss": 0.6023, - "step": 6070 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5016112964537391e-05, - "loss": 0.558, - "step": 6071 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.501438740595094e-05, - "loss": 0.6864, - "step": 6072 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5012661647883571e-05, - "loss": 0.5606, - "step": 6073 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5010935690403932e-05, - "loss": 0.6005, - "step": 6074 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5009209533580687e-05, - "loss": 0.598, - "step": 6075 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5007483177482505e-05, - "loss": 0.6575, - "step": 6076 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5005756622178064e-05, - "loss": 0.6445, - "step": 6077 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.500402986773605e-05, - "loss": 0.6331, - "step": 6078 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.5002302914225153e-05, - "loss": 0.7758, - "step": 6079 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.500057576171408e-05, - "loss": 0.6386, - "step": 6080 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4998848410271535e-05, - "loss": 0.5921, - "step": 6081 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4997120859966236e-05, - "loss": 0.6353, - "step": 6082 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4995393110866912e-05, - "loss": 0.7259, - "step": 6083 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.499366516304229e-05, - "loss": 0.6376, - "step": 6084 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4991937016561113e-05, - "loss": 0.6386, - "step": 6085 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.499020867149213e-05, - "loss": 0.6495, - "step": 6086 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4988480127904097e-05, - "loss": 0.7976, - "step": 6087 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.498675138586578e-05, - "loss": 0.6847, - "step": 6088 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.498502244544595e-05, - "loss": 0.6338, - "step": 6089 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4983293306713385e-05, - "loss": 0.732, - "step": 6090 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4981563969736879e-05, - "loss": 0.5781, - "step": 6091 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.497983443458522e-05, - "loss": 0.6954, - "step": 6092 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4978104701327221e-05, - "loss": 0.6919, - "step": 6093 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4976374770031688e-05, - "loss": 0.7075, - "step": 6094 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4974644640767439e-05, - "loss": 0.6164, - "step": 6095 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4972914313603307e-05, - "loss": 0.7084, - "step": 6096 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4971183788608123e-05, - "loss": 0.6567, - "step": 6097 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.496945306585073e-05, - "loss": 0.7036, - "step": 6098 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4967722145399986e-05, - "loss": 0.6661, - "step": 6099 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.496599102732474e-05, - "loss": 0.5472, - "step": 6100 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4964259711693865e-05, - "loss": 0.6014, - "step": 6101 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4962528198576233e-05, - "loss": 0.6317, - "step": 6102 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4960796488040731e-05, - "loss": 0.5917, - "step": 6103 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4959064580156246e-05, - "loss": 0.6027, - "step": 6104 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4957332474991676e-05, - "loss": 0.6527, - "step": 6105 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4955600172615927e-05, - "loss": 0.8304, - "step": 6106 - }, - { - "epoch": 1.1, - "grad_norm": 0.0, - "learning_rate": 1.4953867673097917e-05, - "loss": 0.6365, - "step": 6107 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.495213497650656e-05, - "loss": 0.5988, - "step": 6108 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.495040208291079e-05, - "loss": 0.6421, - "step": 6109 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.494866899237955e-05, - "loss": 0.8199, - "step": 6110 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4946935704981775e-05, - "loss": 0.6233, - "step": 6111 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4945202220786425e-05, - "loss": 0.6944, - "step": 6112 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.494346853986246e-05, - "loss": 0.7266, - "step": 6113 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4941734662278847e-05, - "loss": 0.6915, - "step": 6114 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4940000588104561e-05, - "loss": 0.6395, - "step": 6115 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4938266317408591e-05, - "loss": 0.6454, - "step": 6116 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4936531850259927e-05, - "loss": 0.7211, - "step": 6117 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4934797186727573e-05, - "loss": 0.55, - "step": 6118 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4933062326880525e-05, - "loss": 0.6494, - "step": 6119 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4931327270787815e-05, - "loss": 0.6913, - "step": 6120 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4929592018518453e-05, - "loss": 0.7162, - "step": 6121 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4927856570141476e-05, - "loss": 0.6574, - "step": 6122 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4926120925725927e-05, - "loss": 0.6294, - "step": 6123 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4924385085340844e-05, - "loss": 0.7299, - "step": 6124 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4922649049055287e-05, - "loss": 0.683, - "step": 6125 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4920912816938322e-05, - "loss": 0.6579, - "step": 6126 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.491917638905901e-05, - "loss": 0.5986, - "step": 6127 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4917439765486436e-05, - "loss": 0.6936, - "step": 6128 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4915702946289687e-05, - "loss": 0.6449, - "step": 6129 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.491396593153785e-05, - "loss": 0.6683, - "step": 6130 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.491222872130003e-05, - "loss": 0.6767, - "step": 6131 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.491049131564534e-05, - "loss": 0.6545, - "step": 6132 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4908753714642892e-05, - "loss": 0.705, - "step": 6133 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.490701591836181e-05, - "loss": 0.6638, - "step": 6134 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4905277926871228e-05, - "loss": 0.6607, - "step": 6135 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4903539740240289e-05, - "loss": 0.6437, - "step": 6136 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.490180135853814e-05, - "loss": 0.5465, - "step": 6137 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4900062781833932e-05, - "loss": 0.657, - "step": 6138 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4898324010196837e-05, - "loss": 0.681, - "step": 6139 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4896585043696017e-05, - "loss": 0.7342, - "step": 6140 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4894845882400659e-05, - "loss": 0.5915, - "step": 6141 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4893106526379946e-05, - "loss": 0.7186, - "step": 6142 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4891366975703073e-05, - "loss": 0.6011, - "step": 6143 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4889627230439238e-05, - "loss": 0.682, - "step": 6144 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4887887290657661e-05, - "loss": 0.5928, - "step": 6145 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4886147156427551e-05, - "loss": 0.5721, - "step": 6146 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4884406827818136e-05, - "loss": 0.6041, - "step": 6147 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4882666304898655e-05, - "loss": 0.5846, - "step": 6148 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4880925587738339e-05, - "loss": 0.6334, - "step": 6149 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4879184676406442e-05, - "loss": 0.6719, - "step": 6150 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4877443570972223e-05, - "loss": 0.5745, - "step": 6151 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4875702271504942e-05, - "loss": 0.5492, - "step": 6152 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.487396077807387e-05, - "loss": 0.632, - "step": 6153 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.487221909074829e-05, - "loss": 0.6644, - "step": 6154 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.487047720959749e-05, - "loss": 0.6859, - "step": 6155 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4868735134690762e-05, - "loss": 0.6097, - "step": 6156 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4866992866097407e-05, - "loss": 0.6435, - "step": 6157 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4865250403886739e-05, - "loss": 0.5931, - "step": 6158 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4863507748128078e-05, - "loss": 0.5435, - "step": 6159 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4861764898890748e-05, - "loss": 0.7484, - "step": 6160 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4860021856244077e-05, - "loss": 0.5544, - "step": 6161 - }, - { - "epoch": 1.11, - "grad_norm": 0.0, - "learning_rate": 1.4858278620257412e-05, - "loss": 0.7182, - "step": 6162 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.48565351910001e-05, - "loss": 0.5787, - "step": 6163 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4854791568541505e-05, - "loss": 0.6347, - "step": 6164 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4853047752950977e-05, - "loss": 0.7096, - "step": 6165 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.48513037442979e-05, - "loss": 0.6579, - "step": 6166 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4849559542651648e-05, - "loss": 0.6595, - "step": 6167 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.484781514808161e-05, - "loss": 0.7169, - "step": 6168 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4846070560657179e-05, - "loss": 0.7729, - "step": 6169 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4844325780447761e-05, - "loss": 0.7406, - "step": 6170 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4842580807522766e-05, - "loss": 0.6196, - "step": 6171 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4840835641951608e-05, - "loss": 0.6024, - "step": 6172 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4839090283803714e-05, - "loss": 0.6319, - "step": 6173 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4837344733148522e-05, - "loss": 0.6166, - "step": 6174 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.483559899005547e-05, - "loss": 0.6162, - "step": 6175 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4833853054594003e-05, - "loss": 0.6212, - "step": 6176 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4832106926833584e-05, - "loss": 0.5249, - "step": 6177 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4830360606843672e-05, - "loss": 0.6593, - "step": 6178 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.482861409469374e-05, - "loss": 0.6679, - "step": 6179 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.482686739045327e-05, - "loss": 0.6212, - "step": 6180 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.482512049419174e-05, - "loss": 0.6215, - "step": 6181 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4823373405978659e-05, - "loss": 0.6999, - "step": 6182 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4821626125883518e-05, - "loss": 0.6597, - "step": 6183 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4819878653975826e-05, - "loss": 0.5727, - "step": 6184 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4818130990325106e-05, - "loss": 0.6043, - "step": 6185 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4816383135000885e-05, - "loss": 0.6839, - "step": 6186 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4814635088072688e-05, - "loss": 0.6899, - "step": 6187 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4812886849610062e-05, - "loss": 0.5738, - "step": 6188 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4811138419682549e-05, - "loss": 0.6967, - "step": 6189 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.480938979835971e-05, - "loss": 0.6482, - "step": 6190 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4807640985711107e-05, - "loss": 0.7202, - "step": 6191 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4805891981806306e-05, - "loss": 0.7513, - "step": 6192 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4804142786714892e-05, - "loss": 0.7107, - "step": 6193 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4802393400506447e-05, - "loss": 0.6891, - "step": 6194 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4800643823250564e-05, - "loss": 0.645, - "step": 6195 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4798894055016848e-05, - "loss": 0.7143, - "step": 6196 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4797144095874907e-05, - "loss": 0.5888, - "step": 6197 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4795393945894353e-05, - "loss": 0.6467, - "step": 6198 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4793643605144814e-05, - "loss": 0.6427, - "step": 6199 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4791893073695918e-05, - "loss": 0.7581, - "step": 6200 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4790142351617311e-05, - "loss": 0.6149, - "step": 6201 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4788391438978632e-05, - "loss": 0.656, - "step": 6202 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4786640335849539e-05, - "loss": 0.6408, - "step": 6203 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4784889042299693e-05, - "loss": 0.6421, - "step": 6204 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4783137558398766e-05, - "loss": 0.6482, - "step": 6205 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4781385884216433e-05, - "loss": 0.6553, - "step": 6206 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4779634019822378e-05, - "loss": 0.6769, - "step": 6207 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.477788196528629e-05, - "loss": 0.6396, - "step": 6208 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4776129720677878e-05, - "loss": 0.8081, - "step": 6209 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.477437728606684e-05, - "loss": 0.6669, - "step": 6210 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4772624661522897e-05, - "loss": 0.6188, - "step": 6211 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4770871847115767e-05, - "loss": 0.7206, - "step": 6212 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4769118842915183e-05, - "loss": 0.6171, - "step": 6213 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.476736564899088e-05, - "loss": 0.6722, - "step": 6214 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4765612265412608e-05, - "loss": 0.5519, - "step": 6215 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4763858692250115e-05, - "loss": 0.7165, - "step": 6216 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.476210492957316e-05, - "loss": 0.7261, - "step": 6217 - }, - { - "epoch": 1.12, - "grad_norm": 0.0, - "learning_rate": 1.4760350977451517e-05, - "loss": 0.5919, - "step": 6218 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4758596835954953e-05, - "loss": 0.6678, - "step": 6219 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4756842505153257e-05, - "loss": 0.6966, - "step": 6220 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4755087985116216e-05, - "loss": 0.8081, - "step": 6221 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4753333275913629e-05, - "loss": 0.6435, - "step": 6222 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4751578377615304e-05, - "loss": 0.6749, - "step": 6223 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4749823290291048e-05, - "loss": 0.6541, - "step": 6224 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4748068014010685e-05, - "loss": 0.6546, - "step": 6225 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4746312548844044e-05, - "loss": 0.5811, - "step": 6226 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4744556894860958e-05, - "loss": 0.7007, - "step": 6227 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4742801052131273e-05, - "loss": 0.6466, - "step": 6228 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4741045020724836e-05, - "loss": 0.5629, - "step": 6229 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4739288800711502e-05, - "loss": 0.6058, - "step": 6230 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4737532392161145e-05, - "loss": 0.5621, - "step": 6231 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4735775795143635e-05, - "loss": 0.6839, - "step": 6232 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4734019009728848e-05, - "loss": 0.6238, - "step": 6233 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4732262035986675e-05, - "loss": 0.5709, - "step": 6234 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4730504873987013e-05, - "loss": 0.7919, - "step": 6235 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4728747523799762e-05, - "loss": 0.5871, - "step": 6236 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4726989985494834e-05, - "loss": 0.6892, - "step": 6237 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4725232259142148e-05, - "loss": 0.7217, - "step": 6238 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4723474344811628e-05, - "loss": 0.6197, - "step": 6239 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4721716242573204e-05, - "loss": 0.5681, - "step": 6240 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4719957952496822e-05, - "loss": 0.6116, - "step": 6241 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4718199474652427e-05, - "loss": 0.7593, - "step": 6242 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.471644080910997e-05, - "loss": 0.5994, - "step": 6243 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4714681955939422e-05, - "loss": 0.6551, - "step": 6244 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4712922915210747e-05, - "loss": 0.7605, - "step": 6245 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4711163686993925e-05, - "loss": 0.7041, - "step": 6246 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4709404271358944e-05, - "loss": 0.5034, - "step": 6247 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.470764466837579e-05, - "loss": 0.7277, - "step": 6248 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4705884878114468e-05, - "loss": 0.681, - "step": 6249 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4704124900644982e-05, - "loss": 0.8903, - "step": 6250 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4702364736037352e-05, - "loss": 0.6975, - "step": 6251 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4700604384361593e-05, - "loss": 0.7186, - "step": 6252 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4698843845687742e-05, - "loss": 0.7359, - "step": 6253 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4697083120085833e-05, - "loss": 0.7222, - "step": 6254 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.469532220762591e-05, - "loss": 0.717, - "step": 6255 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.469356110837803e-05, - "loss": 0.6322, - "step": 6256 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4691799822412243e-05, - "loss": 0.5626, - "step": 6257 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4690038349798626e-05, - "loss": 0.6354, - "step": 6258 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4688276690607247e-05, - "loss": 0.6126, - "step": 6259 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4686514844908186e-05, - "loss": 0.6066, - "step": 6260 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4684752812771541e-05, - "loss": 0.6408, - "step": 6261 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4682990594267402e-05, - "loss": 0.6612, - "step": 6262 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4681228189465876e-05, - "loss": 0.6201, - "step": 6263 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.467946559843707e-05, - "loss": 0.7057, - "step": 6264 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4677702821251108e-05, - "loss": 0.6119, - "step": 6265 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4675939857978117e-05, - "loss": 0.7178, - "step": 6266 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4674176708688225e-05, - "loss": 0.7953, - "step": 6267 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4672413373451577e-05, - "loss": 0.7135, - "step": 6268 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4670649852338322e-05, - "loss": 0.623, - "step": 6269 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4668886145418612e-05, - "loss": 0.712, - "step": 6270 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4667122252762616e-05, - "loss": 0.7395, - "step": 6271 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4665358174440498e-05, - "loss": 0.5922, - "step": 6272 - }, - { - "epoch": 1.13, - "grad_norm": 0.0, - "learning_rate": 1.4663593910522443e-05, - "loss": 0.6416, - "step": 6273 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4661829461078631e-05, - "loss": 0.6474, - "step": 6274 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4660064826179259e-05, - "loss": 0.6005, - "step": 6275 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.465830000589452e-05, - "loss": 0.6408, - "step": 6276 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4656535000294634e-05, - "loss": 0.6106, - "step": 6277 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4654769809449803e-05, - "loss": 0.6538, - "step": 6278 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4653004433430255e-05, - "loss": 0.6685, - "step": 6279 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4651238872306222e-05, - "loss": 0.6118, - "step": 6280 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4649473126147938e-05, - "loss": 0.6982, - "step": 6281 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.464770719502565e-05, - "loss": 0.5755, - "step": 6282 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4645941079009604e-05, - "loss": 0.6694, - "step": 6283 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4644174778170063e-05, - "loss": 0.656, - "step": 6284 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4642408292577298e-05, - "loss": 0.5856, - "step": 6285 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4640641622301575e-05, - "loss": 0.6247, - "step": 6286 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4638874767413179e-05, - "loss": 0.5913, - "step": 6287 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4637107727982399e-05, - "loss": 0.6301, - "step": 6288 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4635340504079527e-05, - "loss": 0.6073, - "step": 6289 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4633573095774872e-05, - "loss": 0.766, - "step": 6290 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.463180550313874e-05, - "loss": 0.686, - "step": 6291 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4630037726241448e-05, - "loss": 0.7097, - "step": 6292 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4628269765153327e-05, - "loss": 0.6479, - "step": 6293 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4626501619944704e-05, - "loss": 0.6808, - "step": 6294 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4624733290685922e-05, - "loss": 0.7068, - "step": 6295 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4622964777447327e-05, - "loss": 0.7915, - "step": 6296 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4621196080299273e-05, - "loss": 0.6547, - "step": 6297 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4619427199312124e-05, - "loss": 0.7081, - "step": 6298 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4617658134556245e-05, - "loss": 0.5829, - "step": 6299 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4615888886102017e-05, - "loss": 0.5228, - "step": 6300 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4614119454019822e-05, - "loss": 0.6892, - "step": 6301 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4612349838380048e-05, - "loss": 0.6575, - "step": 6302 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4610580039253096e-05, - "loss": 0.6314, - "step": 6303 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4608810056709376e-05, - "loss": 0.6188, - "step": 6304 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4607039890819295e-05, - "loss": 0.6802, - "step": 6305 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4605269541653273e-05, - "loss": 0.6393, - "step": 6306 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4603499009281744e-05, - "loss": 0.6289, - "step": 6307 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.460172829377513e-05, - "loss": 0.6851, - "step": 6308 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4599957395203887e-05, - "loss": 0.6674, - "step": 6309 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4598186313638455e-05, - "loss": 0.5317, - "step": 6310 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.45964150491493e-05, - "loss": 0.7026, - "step": 6311 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4594643601806875e-05, - "loss": 0.7338, - "step": 6312 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.459287197168166e-05, - "loss": 0.5333, - "step": 6313 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4591100158844124e-05, - "loss": 0.645, - "step": 6314 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4589328163364767e-05, - "loss": 0.6141, - "step": 6315 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4587555985314069e-05, - "loss": 0.5865, - "step": 6316 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4585783624762536e-05, - "loss": 0.6027, - "step": 6317 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4584011081780673e-05, - "loss": 0.6867, - "step": 6318 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4582238356438997e-05, - "loss": 0.6138, - "step": 6319 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4580465448808032e-05, - "loss": 0.7472, - "step": 6320 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4578692358958302e-05, - "loss": 0.5903, - "step": 6321 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4576919086960346e-05, - "loss": 0.6014, - "step": 6322 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.457514563288471e-05, - "loss": 0.683, - "step": 6323 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.457337199680194e-05, - "loss": 0.5796, - "step": 6324 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.45715981787826e-05, - "loss": 0.6682, - "step": 6325 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4569824178897254e-05, - "loss": 0.7074, - "step": 6326 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4568049997216469e-05, - "loss": 0.6328, - "step": 6327 - }, - { - "epoch": 1.14, - "grad_norm": 0.0, - "learning_rate": 1.4566275633810834e-05, - "loss": 0.6252, - "step": 6328 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4564501088750928e-05, - "loss": 0.6917, - "step": 6329 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4562726362107354e-05, - "loss": 0.7191, - "step": 6330 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4560951453950707e-05, - "loss": 0.6484, - "step": 6331 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4559176364351597e-05, - "loss": 0.6681, - "step": 6332 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4557401093380639e-05, - "loss": 0.6888, - "step": 6333 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4555625641108462e-05, - "loss": 0.6965, - "step": 6334 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.455385000760569e-05, - "loss": 0.627, - "step": 6335 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4552074192942965e-05, - "loss": 0.5942, - "step": 6336 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4550298197190928e-05, - "loss": 0.6507, - "step": 6337 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4548522020420236e-05, - "loss": 0.7287, - "step": 6338 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4546745662701546e-05, - "loss": 0.6523, - "step": 6339 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4544969124105525e-05, - "loss": 0.6925, - "step": 6340 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4543192404702847e-05, - "loss": 0.627, - "step": 6341 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4541415504564191e-05, - "loss": 0.5768, - "step": 6342 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4539638423760246e-05, - "loss": 0.602, - "step": 6343 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4537861162361706e-05, - "loss": 0.7139, - "step": 6344 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4536083720439279e-05, - "loss": 0.7469, - "step": 6345 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4534306098063666e-05, - "loss": 0.6462, - "step": 6346 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4532528295305593e-05, - "loss": 0.6405, - "step": 6347 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4530750312235777e-05, - "loss": 0.7217, - "step": 6348 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4528972148924956e-05, - "loss": 0.6993, - "step": 6349 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.452719380544386e-05, - "loss": 0.6763, - "step": 6350 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4525415281863241e-05, - "loss": 0.6571, - "step": 6351 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4523636578253848e-05, - "loss": 0.6377, - "step": 6352 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4521857694686445e-05, - "loss": 0.679, - "step": 6353 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4520078631231793e-05, - "loss": 0.7, - "step": 6354 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4518299387960673e-05, - "loss": 0.718, - "step": 6355 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4516519964943863e-05, - "loss": 0.6628, - "step": 6356 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4514740362252149e-05, - "loss": 0.6164, - "step": 6357 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4512960579956332e-05, - "loss": 0.6776, - "step": 6358 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4511180618127209e-05, - "loss": 0.647, - "step": 6359 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4509400476835595e-05, - "loss": 0.6078, - "step": 6360 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4507620156152307e-05, - "loss": 0.6506, - "step": 6361 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4505839656148161e-05, - "loss": 0.5808, - "step": 6362 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4504058976894e-05, - "loss": 0.6927, - "step": 6363 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4502278118460658e-05, - "loss": 0.6677, - "step": 6364 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4500497080918976e-05, - "loss": 0.5763, - "step": 6365 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4498715864339809e-05, - "loss": 0.7003, - "step": 6366 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4496934468794022e-05, - "loss": 0.7577, - "step": 6367 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4495152894352479e-05, - "loss": 0.5828, - "step": 6368 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.449337114108605e-05, - "loss": 0.6781, - "step": 6369 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4491589209065618e-05, - "loss": 0.6397, - "step": 6370 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4489807098362074e-05, - "loss": 0.6177, - "step": 6371 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4488024809046316e-05, - "loss": 0.6993, - "step": 6372 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4486242341189239e-05, - "loss": 0.6991, - "step": 6373 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4484459694861758e-05, - "loss": 0.5014, - "step": 6374 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4482676870134787e-05, - "loss": 0.633, - "step": 6375 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.448089386707925e-05, - "loss": 0.5766, - "step": 6376 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4479110685766083e-05, - "loss": 0.6648, - "step": 6377 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4477327326266215e-05, - "loss": 0.6484, - "step": 6378 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4475543788650598e-05, - "loss": 0.5479, - "step": 6379 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4473760072990182e-05, - "loss": 0.6937, - "step": 6380 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4471976179355924e-05, - "loss": 0.7276, - "step": 6381 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4470192107818797e-05, - "loss": 0.7105, - "step": 6382 - }, - { - "epoch": 1.15, - "grad_norm": 0.0, - "learning_rate": 1.4468407858449768e-05, - "loss": 0.613, - "step": 6383 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4466623431319816e-05, - "loss": 0.6472, - "step": 6384 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4464838826499938e-05, - "loss": 0.5281, - "step": 6385 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.446305404406112e-05, - "loss": 0.6217, - "step": 6386 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4461269084074368e-05, - "loss": 0.6638, - "step": 6387 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4459483946610689e-05, - "loss": 0.5554, - "step": 6388 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4457698631741096e-05, - "loss": 0.676, - "step": 6389 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4455913139536617e-05, - "loss": 0.537, - "step": 6390 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4454127470068283e-05, - "loss": 0.5646, - "step": 6391 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4452341623407123e-05, - "loss": 0.6052, - "step": 6392 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4450555599624187e-05, - "loss": 0.6448, - "step": 6393 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4448769398790525e-05, - "loss": 0.5899, - "step": 6394 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4446983020977197e-05, - "loss": 0.6185, - "step": 6395 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4445196466255265e-05, - "loss": 0.6763, - "step": 6396 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4443409734695803e-05, - "loss": 0.6251, - "step": 6397 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.444162282636989e-05, - "loss": 0.6182, - "step": 6398 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4439835741348612e-05, - "loss": 0.6381, - "step": 6399 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.443804847970306e-05, - "loss": 0.652, - "step": 6400 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4436261041504338e-05, - "loss": 0.6647, - "step": 6401 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4434473426823556e-05, - "loss": 0.644, - "step": 6402 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.443268563573182e-05, - "loss": 0.7081, - "step": 6403 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4430897668300257e-05, - "loss": 0.5887, - "step": 6404 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4429109524599993e-05, - "loss": 0.6837, - "step": 6405 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4427321204702167e-05, - "loss": 0.7848, - "step": 6406 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4425532708677918e-05, - "loss": 0.5158, - "step": 6407 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4423744036598395e-05, - "loss": 0.5934, - "step": 6408 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4421955188534757e-05, - "loss": 0.6587, - "step": 6409 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.442016616455817e-05, - "loss": 0.7072, - "step": 6410 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4418376964739795e-05, - "loss": 0.6598, - "step": 6411 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4416587589150817e-05, - "loss": 0.6393, - "step": 6412 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.441479803786242e-05, - "loss": 0.6514, - "step": 6413 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4413008310945793e-05, - "loss": 0.6304, - "step": 6414 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4411218408472136e-05, - "loss": 0.5675, - "step": 6415 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4409428330512652e-05, - "loss": 0.6761, - "step": 6416 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.440763807713856e-05, - "loss": 0.6876, - "step": 6417 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4405847648421072e-05, - "loss": 0.712, - "step": 6418 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4404057044431414e-05, - "loss": 0.7011, - "step": 6419 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4402266265240825e-05, - "loss": 0.5922, - "step": 6420 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4400475310920544e-05, - "loss": 0.664, - "step": 6421 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4398684181541813e-05, - "loss": 0.6296, - "step": 6422 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4396892877175893e-05, - "loss": 0.7032, - "step": 6423 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.439510139789404e-05, - "loss": 0.6594, - "step": 6424 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4393309743767525e-05, - "loss": 0.6551, - "step": 6425 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4391517914867622e-05, - "loss": 0.5607, - "step": 6426 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4389725911265613e-05, - "loss": 0.6476, - "step": 6427 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4387933733032789e-05, - "loss": 0.6082, - "step": 6428 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4386141380240444e-05, - "loss": 0.7059, - "step": 6429 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4384348852959877e-05, - "loss": 0.7113, - "step": 6430 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4382556151262405e-05, - "loss": 0.7345, - "step": 6431 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4380763275219343e-05, - "loss": 0.6138, - "step": 6432 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4378970224902012e-05, - "loss": 0.6572, - "step": 6433 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4377177000381743e-05, - "loss": 0.7458, - "step": 6434 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4375383601729873e-05, - "loss": 0.5974, - "step": 6435 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4373590029017751e-05, - "loss": 0.6119, - "step": 6436 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4371796282316726e-05, - "loss": 0.6594, - "step": 6437 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.437000236169815e-05, - "loss": 0.742, - "step": 6438 - }, - { - "epoch": 1.16, - "grad_norm": 0.0, - "learning_rate": 1.4368208267233399e-05, - "loss": 0.684, - "step": 6439 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4366413998993839e-05, - "loss": 0.647, - "step": 6440 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.436461955705085e-05, - "loss": 0.5431, - "step": 6441 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4362824941475815e-05, - "loss": 0.6422, - "step": 6442 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4361030152340135e-05, - "loss": 0.6215, - "step": 6443 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.43592351897152e-05, - "loss": 0.7376, - "step": 6444 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4357440053672424e-05, - "loss": 0.662, - "step": 6445 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4355644744283215e-05, - "loss": 0.6042, - "step": 6446 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4353849261619001e-05, - "loss": 0.6553, - "step": 6447 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4352053605751203e-05, - "loss": 0.6693, - "step": 6448 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4350257776751255e-05, - "loss": 0.583, - "step": 6449 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4348461774690602e-05, - "loss": 0.6023, - "step": 6450 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.434666559964069e-05, - "loss": 0.6059, - "step": 6451 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4344869251672974e-05, - "loss": 0.6348, - "step": 6452 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4343072730858918e-05, - "loss": 0.6339, - "step": 6453 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4341276037269984e-05, - "loss": 0.704, - "step": 6454 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4339479170977657e-05, - "loss": 0.7057, - "step": 6455 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4337682132053415e-05, - "loss": 0.6004, - "step": 6456 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4335884920568743e-05, - "loss": 0.597, - "step": 6457 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4334087536595143e-05, - "loss": 0.6176, - "step": 6458 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4332289980204114e-05, - "loss": 0.6595, - "step": 6459 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4330492251467171e-05, - "loss": 0.6172, - "step": 6460 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4328694350455827e-05, - "loss": 0.6699, - "step": 6461 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4326896277241604e-05, - "loss": 0.6715, - "step": 6462 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.432509803189604e-05, - "loss": 0.6731, - "step": 6463 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4323299614490663e-05, - "loss": 0.6507, - "step": 6464 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.432150102509702e-05, - "loss": 0.7095, - "step": 6465 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4319702263786668e-05, - "loss": 0.7494, - "step": 6466 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4317903330631156e-05, - "loss": 0.6333, - "step": 6467 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4316104225702052e-05, - "loss": 0.6126, - "step": 6468 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.431430494907093e-05, - "loss": 0.7247, - "step": 6469 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4312505500809364e-05, - "loss": 0.6917, - "step": 6470 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4310705880988942e-05, - "loss": 0.6642, - "step": 6471 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4308906089681257e-05, - "loss": 0.646, - "step": 6472 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4307106126957906e-05, - "loss": 0.6779, - "step": 6473 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4305305992890495e-05, - "loss": 0.6436, - "step": 6474 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4303505687550636e-05, - "loss": 0.6641, - "step": 6475 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4301705211009947e-05, - "loss": 0.6405, - "step": 6476 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4299904563340054e-05, - "loss": 0.5102, - "step": 6477 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4298103744612598e-05, - "loss": 0.5725, - "step": 6478 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4296302754899205e-05, - "loss": 0.5743, - "step": 6479 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4294501594271532e-05, - "loss": 0.6347, - "step": 6480 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4292700262801226e-05, - "loss": 0.6786, - "step": 6481 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4290898760559952e-05, - "loss": 0.6094, - "step": 6482 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4289097087619377e-05, - "loss": 0.6768, - "step": 6483 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4287295244051167e-05, - "loss": 0.5717, - "step": 6484 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4285493229927012e-05, - "loss": 0.6689, - "step": 6485 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4283691045318591e-05, - "loss": 0.6466, - "step": 6486 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4281888690297605e-05, - "loss": 0.6889, - "step": 6487 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4280086164935749e-05, - "loss": 0.6557, - "step": 6488 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4278283469304737e-05, - "loss": 0.677, - "step": 6489 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4276480603476275e-05, - "loss": 0.6361, - "step": 6490 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4274677567522091e-05, - "loss": 0.5552, - "step": 6491 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4272874361513908e-05, - "loss": 0.57, - "step": 6492 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4271070985523467e-05, - "loss": 0.6456, - "step": 6493 - }, - { - "epoch": 1.17, - "grad_norm": 0.0, - "learning_rate": 1.4269267439622506e-05, - "loss": 0.5963, - "step": 6494 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4267463723882768e-05, - "loss": 0.6381, - "step": 6495 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4265659838376014e-05, - "loss": 0.5505, - "step": 6496 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4263855783174006e-05, - "loss": 0.5511, - "step": 6497 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.426205155834851e-05, - "loss": 0.6119, - "step": 6498 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.42602471639713e-05, - "loss": 0.6594, - "step": 6499 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4258442600114161e-05, - "loss": 0.6845, - "step": 6500 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4256637866848881e-05, - "loss": 0.7143, - "step": 6501 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4254832964247252e-05, - "loss": 0.6003, - "step": 6502 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.425302789238108e-05, - "loss": 0.5862, - "step": 6503 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4251222651322173e-05, - "loss": 0.5637, - "step": 6504 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4249417241142344e-05, - "loss": 0.6716, - "step": 6505 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4247611661913421e-05, - "loss": 0.6975, - "step": 6506 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4245805913707229e-05, - "loss": 0.6334, - "step": 6507 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4243999996595603e-05, - "loss": 0.6277, - "step": 6508 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4242193910650388e-05, - "loss": 0.7619, - "step": 6509 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.424038765594343e-05, - "loss": 0.5884, - "step": 6510 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4238581232546586e-05, - "loss": 0.6631, - "step": 6511 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4236774640531725e-05, - "loss": 0.5586, - "step": 6512 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4234967879970708e-05, - "loss": 0.6337, - "step": 6513 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.423316095093541e-05, - "loss": 0.6261, - "step": 6514 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.423135385349772e-05, - "loss": 0.7291, - "step": 6515 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4229546587729529e-05, - "loss": 0.6932, - "step": 6516 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4227739153702725e-05, - "loss": 0.6726, - "step": 6517 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4225931551489213e-05, - "loss": 0.7189, - "step": 6518 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.422412378116091e-05, - "loss": 0.6605, - "step": 6519 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4222315842789724e-05, - "loss": 0.747, - "step": 6520 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.422050773644758e-05, - "loss": 0.5454, - "step": 6521 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4218699462206407e-05, - "loss": 0.685, - "step": 6522 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4216891020138145e-05, - "loss": 0.6316, - "step": 6523 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4215082410314733e-05, - "loss": 0.4863, - "step": 6524 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.421327363280812e-05, - "loss": 0.5785, - "step": 6525 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4211464687690269e-05, - "loss": 0.5919, - "step": 6526 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4209655575033135e-05, - "loss": 0.6594, - "step": 6527 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4207846294908692e-05, - "loss": 0.6353, - "step": 6528 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4206036847388914e-05, - "loss": 0.5968, - "step": 6529 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4204227232545781e-05, - "loss": 0.6866, - "step": 6530 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4202417450451294e-05, - "loss": 0.6839, - "step": 6531 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4200607501177435e-05, - "loss": 0.6985, - "step": 6532 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4198797384796216e-05, - "loss": 0.7543, - "step": 6533 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4196987101379645e-05, - "loss": 0.6987, - "step": 6534 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4195176650999738e-05, - "loss": 0.568, - "step": 6535 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4193366033728516e-05, - "loss": 0.5598, - "step": 6536 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.419155524963801e-05, - "loss": 0.6615, - "step": 6537 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4189744298800255e-05, - "loss": 0.7104, - "step": 6538 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4187933181287294e-05, - "loss": 0.6054, - "step": 6539 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4186121897171177e-05, - "loss": 0.5965, - "step": 6540 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4184310446523958e-05, - "loss": 0.7364, - "step": 6541 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4182498829417706e-05, - "loss": 0.619, - "step": 6542 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4180687045924479e-05, - "loss": 0.7116, - "step": 6543 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4178875096116365e-05, - "loss": 0.6269, - "step": 6544 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.417706298006544e-05, - "loss": 0.6221, - "step": 6545 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4175250697843792e-05, - "loss": 0.7019, - "step": 6546 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.417343824952352e-05, - "loss": 0.618, - "step": 6547 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4171625635176724e-05, - "loss": 0.6086, - "step": 6548 - }, - { - "epoch": 1.18, - "grad_norm": 0.0, - "learning_rate": 1.4169812854875512e-05, - "loss": 0.5595, - "step": 6549 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4167999908692007e-05, - "loss": 0.5602, - "step": 6550 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4166186796698321e-05, - "loss": 0.6432, - "step": 6551 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4164373518966588e-05, - "loss": 0.66, - "step": 6552 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4162560075568945e-05, - "loss": 0.572, - "step": 6553 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4160746466577529e-05, - "loss": 0.632, - "step": 6554 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4158932692064489e-05, - "loss": 0.694, - "step": 6555 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4157118752101983e-05, - "loss": 0.6227, - "step": 6556 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4155304646762174e-05, - "loss": 0.6174, - "step": 6557 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4153490376117227e-05, - "loss": 0.6275, - "step": 6558 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4151675940239316e-05, - "loss": 0.5503, - "step": 6559 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4149861339200622e-05, - "loss": 0.6278, - "step": 6560 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4148046573073339e-05, - "loss": 0.7161, - "step": 6561 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4146231641929653e-05, - "loss": 0.6715, - "step": 6562 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4144416545841772e-05, - "loss": 0.7395, - "step": 6563 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4142601284881904e-05, - "loss": 0.6352, - "step": 6564 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4140785859122255e-05, - "loss": 0.5677, - "step": 6565 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4138970268635055e-05, - "loss": 0.6738, - "step": 6566 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4137154513492526e-05, - "loss": 0.73, - "step": 6567 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.41353385937669e-05, - "loss": 0.707, - "step": 6568 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4133522509530424e-05, - "loss": 0.6861, - "step": 6569 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.413170626085534e-05, - "loss": 0.6216, - "step": 6570 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4129889847813903e-05, - "loss": 0.6479, - "step": 6571 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4128073270478375e-05, - "loss": 0.6415, - "step": 6572 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4126256528921018e-05, - "loss": 0.5859, - "step": 6573 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4124439623214108e-05, - "loss": 0.6336, - "step": 6574 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4122622553429926e-05, - "loss": 0.6793, - "step": 6575 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4120805319640755e-05, - "loss": 0.5642, - "step": 6576 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4118987921918889e-05, - "loss": 0.5867, - "step": 6577 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4117170360336625e-05, - "loss": 0.67, - "step": 6578 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4115352634966274e-05, - "loss": 0.6057, - "step": 6579 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4113534745880147e-05, - "loss": 0.6825, - "step": 6580 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4111716693150557e-05, - "loss": 0.5595, - "step": 6581 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4109898476849835e-05, - "loss": 0.6308, - "step": 6582 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.410808009705031e-05, - "loss": 0.6525, - "step": 6583 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4106261553824323e-05, - "loss": 0.6126, - "step": 6584 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4104442847244216e-05, - "loss": 0.5734, - "step": 6585 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4102623977382342e-05, - "loss": 0.6267, - "step": 6586 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4100804944311052e-05, - "loss": 0.6973, - "step": 6587 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4098985748102725e-05, - "loss": 0.6788, - "step": 6588 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4097166388829719e-05, - "loss": 0.6258, - "step": 6589 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4095346866564415e-05, - "loss": 0.6121, - "step": 6590 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.40935271813792e-05, - "loss": 0.7569, - "step": 6591 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4091707333346455e-05, - "loss": 0.6322, - "step": 6592 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.408988732253859e-05, - "loss": 0.6507, - "step": 6593 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4088067149027994e-05, - "loss": 0.6801, - "step": 6594 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4086246812887087e-05, - "loss": 0.6624, - "step": 6595 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4084426314188284e-05, - "loss": 0.7303, - "step": 6596 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4082605653004e-05, - "loss": 0.6063, - "step": 6597 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4080784829406673e-05, - "loss": 0.5779, - "step": 6598 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4078963843468737e-05, - "loss": 0.623, - "step": 6599 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4077142695262625e-05, - "loss": 0.6242, - "step": 6600 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4075321384860793e-05, - "loss": 0.6304, - "step": 6601 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.40734999123357e-05, - "loss": 0.6433, - "step": 6602 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4071678277759799e-05, - "loss": 0.6516, - "step": 6603 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4069856481205562e-05, - "loss": 0.6333, - "step": 6604 - }, - { - "epoch": 1.19, - "grad_norm": 0.0, - "learning_rate": 1.4068034522745461e-05, - "loss": 0.7045, - "step": 6605 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.406621240245198e-05, - "loss": 0.636, - "step": 6606 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4064390120397602e-05, - "loss": 0.8876, - "step": 6607 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4062567676654819e-05, - "loss": 0.5954, - "step": 6608 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4060745071296138e-05, - "loss": 0.632, - "step": 6609 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4058922304394065e-05, - "loss": 0.5598, - "step": 6610 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4057099376021104e-05, - "loss": 0.6834, - "step": 6611 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4055276286249782e-05, - "loss": 0.6541, - "step": 6612 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.405345303515262e-05, - "loss": 0.6798, - "step": 6613 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4051629622802154e-05, - "loss": 0.6573, - "step": 6614 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4049806049270923e-05, - "loss": 0.7017, - "step": 6615 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4047982314631465e-05, - "loss": 0.534, - "step": 6616 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4046158418956337e-05, - "loss": 0.6035, - "step": 6617 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.40443343623181e-05, - "loss": 0.7292, - "step": 6618 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.404251014478931e-05, - "loss": 0.653, - "step": 6619 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4040685766442541e-05, - "loss": 0.5964, - "step": 6620 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.403886122735037e-05, - "loss": 0.6593, - "step": 6621 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.403703652758538e-05, - "loss": 0.5327, - "step": 6622 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4035211667220166e-05, - "loss": 0.68, - "step": 6623 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4033386646327313e-05, - "loss": 0.5987, - "step": 6624 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4031561464979436e-05, - "loss": 0.5981, - "step": 6625 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4029736123249134e-05, - "loss": 0.5664, - "step": 6626 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4027910621209023e-05, - "loss": 0.6171, - "step": 6627 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.402608495893173e-05, - "loss": 0.6518, - "step": 6628 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.402425913648988e-05, - "loss": 0.6625, - "step": 6629 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4022433153956107e-05, - "loss": 0.6563, - "step": 6630 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4020607011403056e-05, - "loss": 0.6075, - "step": 6631 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4018780708903365e-05, - "loss": 0.582, - "step": 6632 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4016954246529697e-05, - "loss": 0.6148, - "step": 6633 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4015127624354705e-05, - "loss": 0.6613, - "step": 6634 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4013300842451058e-05, - "loss": 0.7064, - "step": 6635 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4011473900891428e-05, - "loss": 0.7518, - "step": 6636 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.40096467997485e-05, - "loss": 0.6816, - "step": 6637 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4007819539094945e-05, - "loss": 0.671, - "step": 6638 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4005992119003468e-05, - "loss": 0.7248, - "step": 6639 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.400416453954676e-05, - "loss": 0.5853, - "step": 6640 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4002336800797527e-05, - "loss": 0.6671, - "step": 6641 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.4000508902828482e-05, - "loss": 0.6458, - "step": 6642 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3998680845712335e-05, - "loss": 0.6391, - "step": 6643 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.399685262952182e-05, - "loss": 0.6414, - "step": 6644 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3995024254329657e-05, - "loss": 0.6787, - "step": 6645 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3993195720208583e-05, - "loss": 0.6276, - "step": 6646 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3991367027231345e-05, - "loss": 0.7203, - "step": 6647 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.398953817547069e-05, - "loss": 0.549, - "step": 6648 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.398770916499937e-05, - "loss": 0.7273, - "step": 6649 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.398587999589015e-05, - "loss": 0.5811, - "step": 6650 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3984050668215793e-05, - "loss": 0.6939, - "step": 6651 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3982221182049078e-05, - "loss": 0.6287, - "step": 6652 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3980391537462783e-05, - "loss": 0.7173, - "step": 6653 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.397856173452969e-05, - "loss": 0.7582, - "step": 6654 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3976731773322598e-05, - "loss": 0.5897, - "step": 6655 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3974901653914306e-05, - "loss": 0.6038, - "step": 6656 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3973071376377612e-05, - "loss": 0.646, - "step": 6657 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.3971240940785336e-05, - "loss": 0.5888, - "step": 6658 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.396941034721029e-05, - "loss": 0.6695, - "step": 6659 - }, - { - "epoch": 1.2, - "grad_norm": 0.0, - "learning_rate": 1.39675795957253e-05, - "loss": 0.7209, - "step": 6660 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.39657486864032e-05, - "loss": 0.6158, - "step": 6661 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.396391761931682e-05, - "loss": 0.6367, - "step": 6662 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3962086394539009e-05, - "loss": 0.7414, - "step": 6663 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3960255012142613e-05, - "loss": 0.7565, - "step": 6664 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3958423472200488e-05, - "loss": 0.6119, - "step": 6665 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3956591774785496e-05, - "loss": 0.6163, - "step": 6666 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3954759919970506e-05, - "loss": 0.5096, - "step": 6667 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.395292790782839e-05, - "loss": 0.6613, - "step": 6668 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3951095738432031e-05, - "loss": 0.8105, - "step": 6669 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3949263411854315e-05, - "loss": 0.6775, - "step": 6670 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3947430928168138e-05, - "loss": 0.572, - "step": 6671 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3945598287446394e-05, - "loss": 0.6864, - "step": 6672 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3943765489761986e-05, - "loss": 0.6698, - "step": 6673 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3941932535187836e-05, - "loss": 0.5768, - "step": 6674 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.394009942379686e-05, - "loss": 0.6426, - "step": 6675 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3938266155661973e-05, - "loss": 0.634, - "step": 6676 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3936432730856112e-05, - "loss": 0.5881, - "step": 6677 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3934599149452217e-05, - "loss": 0.6705, - "step": 6678 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3932765411523226e-05, - "loss": 0.6153, - "step": 6679 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.393093151714209e-05, - "loss": 0.6304, - "step": 6680 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3929097466381762e-05, - "loss": 0.5201, - "step": 6681 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3927263259315212e-05, - "loss": 0.5946, - "step": 6682 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3925428896015396e-05, - "loss": 0.6632, - "step": 6683 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3923594376555294e-05, - "loss": 0.5471, - "step": 6684 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3921759701007886e-05, - "loss": 0.6324, - "step": 6685 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3919924869446162e-05, - "loss": 0.6119, - "step": 6686 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3918089881943108e-05, - "loss": 0.6589, - "step": 6687 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.391625473857173e-05, - "loss": 0.6705, - "step": 6688 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3914419439405027e-05, - "loss": 0.6065, - "step": 6689 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3912583984516015e-05, - "loss": 0.6331, - "step": 6690 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.391074837397771e-05, - "loss": 0.7629, - "step": 6691 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.390891260786313e-05, - "loss": 0.6965, - "step": 6692 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3907076686245313e-05, - "loss": 0.6255, - "step": 6693 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3905240609197295e-05, - "loss": 0.7231, - "step": 6694 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.390340437679211e-05, - "loss": 0.5405, - "step": 6695 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3901567989102818e-05, - "loss": 0.6484, - "step": 6696 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3899731446202465e-05, - "loss": 0.6527, - "step": 6697 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3897894748164114e-05, - "loss": 0.5934, - "step": 6698 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3896057895060833e-05, - "loss": 0.737, - "step": 6699 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3894220886965692e-05, - "loss": 0.6811, - "step": 6700 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3892383723951777e-05, - "loss": 0.6212, - "step": 6701 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3890546406092168e-05, - "loss": 0.6043, - "step": 6702 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3888708933459957e-05, - "loss": 0.7026, - "step": 6703 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3886871306128244e-05, - "loss": 0.6903, - "step": 6704 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3885033524170131e-05, - "loss": 0.6302, - "step": 6705 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.388319558765873e-05, - "loss": 0.6604, - "step": 6706 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3881357496667157e-05, - "loss": 0.6634, - "step": 6707 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3879519251268531e-05, - "loss": 0.6588, - "step": 6708 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3877680851535986e-05, - "loss": 0.552, - "step": 6709 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3875842297542654e-05, - "loss": 0.6024, - "step": 6710 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.387400358936167e-05, - "loss": 0.737, - "step": 6711 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3872164727066193e-05, - "loss": 0.55, - "step": 6712 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3870325710729366e-05, - "loss": 0.5776, - "step": 6713 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3868486540424355e-05, - "loss": 0.6709, - "step": 6714 - }, - { - "epoch": 1.21, - "grad_norm": 0.0, - "learning_rate": 1.3866647216224322e-05, - "loss": 0.7029, - "step": 6715 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3864807738202436e-05, - "loss": 0.5576, - "step": 6716 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3862968106431882e-05, - "loss": 0.6205, - "step": 6717 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3861128320985837e-05, - "loss": 0.5514, - "step": 6718 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.385928838193749e-05, - "loss": 0.6278, - "step": 6719 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3857448289360045e-05, - "loss": 0.6011, - "step": 6720 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3855608043326696e-05, - "loss": 0.5753, - "step": 6721 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3853767643910651e-05, - "loss": 0.6177, - "step": 6722 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.385192709118513e-05, - "loss": 0.6652, - "step": 6723 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.385008638522335e-05, - "loss": 0.6122, - "step": 6724 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.384824552609854e-05, - "loss": 0.6536, - "step": 6725 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3846404513883931e-05, - "loss": 0.6176, - "step": 6726 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3844563348652757e-05, - "loss": 0.5415, - "step": 6727 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3842722030478272e-05, - "loss": 0.6581, - "step": 6728 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.384088055943372e-05, - "loss": 0.6131, - "step": 6729 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3839038935592358e-05, - "loss": 0.5548, - "step": 6730 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3837197159027453e-05, - "loss": 0.6633, - "step": 6731 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3835355229812266e-05, - "loss": 0.6355, - "step": 6732 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3833513148020083e-05, - "loss": 0.6406, - "step": 6733 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3831670913724177e-05, - "loss": 0.7292, - "step": 6734 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3829828526997841e-05, - "loss": 0.7024, - "step": 6735 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3827985987914363e-05, - "loss": 0.6025, - "step": 6736 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3826143296547045e-05, - "loss": 0.5821, - "step": 6737 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.382430045296919e-05, - "loss": 0.7198, - "step": 6738 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3822457457254119e-05, - "loss": 0.5377, - "step": 6739 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3820614309475135e-05, - "loss": 0.6566, - "step": 6740 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3818771009705572e-05, - "loss": 0.5774, - "step": 6741 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3816927558018753e-05, - "loss": 0.6044, - "step": 6742 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.381508395448802e-05, - "loss": 0.5894, - "step": 6743 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.381324019918671e-05, - "loss": 0.5902, - "step": 6744 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3811396292188174e-05, - "loss": 0.5841, - "step": 6745 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3809552233565762e-05, - "loss": 0.6136, - "step": 6746 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3807708023392841e-05, - "loss": 0.6109, - "step": 6747 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3805863661742767e-05, - "loss": 0.6056, - "step": 6748 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3804019148688916e-05, - "loss": 0.6475, - "step": 6749 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3802174484304671e-05, - "loss": 0.6726, - "step": 6750 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3800329668663408e-05, - "loss": 0.6659, - "step": 6751 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3798484701838522e-05, - "loss": 0.653, - "step": 6752 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3796639583903408e-05, - "loss": 0.6453, - "step": 6753 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3794794314931465e-05, - "loss": 0.6185, - "step": 6754 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3792948894996106e-05, - "loss": 0.5896, - "step": 6755 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.379110332417074e-05, - "loss": 0.6003, - "step": 6756 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3789257602528789e-05, - "loss": 0.6178, - "step": 6757 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3787411730143683e-05, - "loss": 0.6391, - "step": 6758 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3785565707088845e-05, - "loss": 0.5526, - "step": 6759 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.378371953343772e-05, - "loss": 0.5536, - "step": 6760 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3781873209263754e-05, - "loss": 0.5839, - "step": 6761 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3780026734640387e-05, - "loss": 0.7687, - "step": 6762 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3778180109641086e-05, - "loss": 0.6186, - "step": 6763 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3776333334339308e-05, - "loss": 0.6388, - "step": 6764 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3774486408808514e-05, - "loss": 0.6234, - "step": 6765 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3772639333122192e-05, - "loss": 0.7011, - "step": 6766 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3770792107353811e-05, - "loss": 0.7169, - "step": 6767 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.376894473157686e-05, - "loss": 0.5843, - "step": 6768 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3767097205864835e-05, - "loss": 0.5637, - "step": 6769 - }, - { - "epoch": 1.22, - "grad_norm": 0.0, - "learning_rate": 1.3765249530291223e-05, - "loss": 0.648, - "step": 6770 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3763401704929537e-05, - "loss": 0.7191, - "step": 6771 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3761553729853288e-05, - "loss": 0.5992, - "step": 6772 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3759705605135984e-05, - "loss": 0.6387, - "step": 6773 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.375785733085115e-05, - "loss": 0.636, - "step": 6774 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3756008907072315e-05, - "loss": 0.8323, - "step": 6775 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.375416033387301e-05, - "loss": 0.7336, - "step": 6776 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3752311611326779e-05, - "loss": 0.632, - "step": 6777 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.375046273950716e-05, - "loss": 0.6036, - "step": 6778 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3748613718487711e-05, - "loss": 0.6022, - "step": 6779 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3746764548341986e-05, - "loss": 0.5808, - "step": 6780 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.374491522914355e-05, - "loss": 0.7479, - "step": 6781 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3743065760965968e-05, - "loss": 0.6951, - "step": 6782 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3741216143882819e-05, - "loss": 0.6825, - "step": 6783 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3739366377967685e-05, - "loss": 0.5962, - "step": 6784 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3737516463294148e-05, - "loss": 0.7275, - "step": 6785 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3735666399935801e-05, - "loss": 0.545, - "step": 6786 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.373381618796625e-05, - "loss": 0.5479, - "step": 6787 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3731965827459092e-05, - "loss": 0.6368, - "step": 6788 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.373011531848794e-05, - "loss": 0.6701, - "step": 6789 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3728264661126412e-05, - "loss": 0.5922, - "step": 6790 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3726413855448128e-05, - "loss": 0.7496, - "step": 6791 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3724562901526717e-05, - "loss": 0.6698, - "step": 6792 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3722711799435815e-05, - "loss": 0.713, - "step": 6793 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3720860549249058e-05, - "loss": 0.5697, - "step": 6794 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3719009151040093e-05, - "loss": 0.5832, - "step": 6795 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3717157604882576e-05, - "loss": 0.6167, - "step": 6796 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.371530591085016e-05, - "loss": 0.66, - "step": 6797 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.371345406901651e-05, - "loss": 0.6128, - "step": 6798 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3711602079455297e-05, - "loss": 0.6213, - "step": 6799 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3709749942240195e-05, - "loss": 0.7035, - "step": 6800 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3707897657444885e-05, - "loss": 0.5957, - "step": 6801 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3706045225143053e-05, - "loss": 0.7486, - "step": 6802 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3704192645408394e-05, - "loss": 0.7315, - "step": 6803 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3702339918314606e-05, - "loss": 0.6554, - "step": 6804 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.370048704393539e-05, - "loss": 0.6178, - "step": 6805 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3698634022344466e-05, - "loss": 0.6378, - "step": 6806 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3696780853615543e-05, - "loss": 0.6742, - "step": 6807 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3694927537822342e-05, - "loss": 0.724, - "step": 6808 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3693074075038598e-05, - "loss": 0.6795, - "step": 6809 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3691220465338036e-05, - "loss": 0.5641, - "step": 6810 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3689366708794402e-05, - "loss": 0.7558, - "step": 6811 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3687512805481443e-05, - "loss": 0.5975, - "step": 6812 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3685658755472904e-05, - "loss": 0.6217, - "step": 6813 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3683804558842545e-05, - "loss": 0.6735, - "step": 6814 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3681950215664135e-05, - "loss": 0.5533, - "step": 6815 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3680095726011434e-05, - "loss": 0.6836, - "step": 6816 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.367824108995822e-05, - "loss": 0.737, - "step": 6817 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3676386307578278e-05, - "loss": 0.6115, - "step": 6818 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3674531378945389e-05, - "loss": 0.5943, - "step": 6819 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3672676304133346e-05, - "loss": 0.6655, - "step": 6820 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3670821083215948e-05, - "loss": 0.5846, - "step": 6821 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3668965716267e-05, - "loss": 0.5981, - "step": 6822 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3667110203360309e-05, - "loss": 0.6796, - "step": 6823 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3665254544569691e-05, - "loss": 0.6493, - "step": 6824 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3663398739968972e-05, - "loss": 0.6152, - "step": 6825 - }, - { - "epoch": 1.23, - "grad_norm": 0.0, - "learning_rate": 1.3661542789631973e-05, - "loss": 0.6698, - "step": 6826 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.365968669363253e-05, - "loss": 0.661, - "step": 6827 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3657830452044481e-05, - "loss": 0.5773, - "step": 6828 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3655974064941669e-05, - "loss": 0.6182, - "step": 6829 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3654117532397946e-05, - "loss": 0.6511, - "step": 6830 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.365226085448717e-05, - "loss": 0.6508, - "step": 6831 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3650404031283198e-05, - "loss": 0.5413, - "step": 6832 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.36485470628599e-05, - "loss": 0.6696, - "step": 6833 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3646689949291151e-05, - "loss": 0.7194, - "step": 6834 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3644832690650829e-05, - "loss": 0.6207, - "step": 6835 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3642975287012816e-05, - "loss": 0.6702, - "step": 6836 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3641117738451008e-05, - "loss": 0.658, - "step": 6837 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.36392600450393e-05, - "loss": 0.6312, - "step": 6838 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3637402206851591e-05, - "loss": 0.596, - "step": 6839 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3635544223961792e-05, - "loss": 0.5735, - "step": 6840 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3633686096443813e-05, - "loss": 0.5883, - "step": 6841 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.363182782437158e-05, - "loss": 0.5617, - "step": 6842 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3629969407819008e-05, - "loss": 0.5807, - "step": 6843 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.362811084686004e-05, - "loss": 0.6714, - "step": 6844 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3626252141568608e-05, - "loss": 0.7094, - "step": 6845 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3624393292018651e-05, - "loss": 0.7238, - "step": 6846 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.362253429828412e-05, - "loss": 0.6009, - "step": 6847 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3620675160438972e-05, - "loss": 0.6553, - "step": 6848 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.361881587855716e-05, - "loss": 0.6592, - "step": 6849 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3616956452712656e-05, - "loss": 0.6229, - "step": 6850 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3615096882979423e-05, - "loss": 0.6328, - "step": 6851 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3613237169431446e-05, - "loss": 0.6473, - "step": 6852 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3611377312142709e-05, - "loss": 0.713, - "step": 6853 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3609517311187188e-05, - "loss": 0.5787, - "step": 6854 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.360765716663889e-05, - "loss": 0.5878, - "step": 6855 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.360579687857181e-05, - "loss": 0.5887, - "step": 6856 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3603936447059951e-05, - "loss": 0.619, - "step": 6857 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3602075872177327e-05, - "loss": 0.5812, - "step": 6858 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3600215153997954e-05, - "loss": 0.6613, - "step": 6859 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3598354292595852e-05, - "loss": 0.6292, - "step": 6860 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3596493288045057e-05, - "loss": 0.6847, - "step": 6861 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3594632140419592e-05, - "loss": 0.7426, - "step": 6862 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3592770849793507e-05, - "loss": 0.6239, - "step": 6863 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3590909416240842e-05, - "loss": 0.6865, - "step": 6864 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3589047839835647e-05, - "loss": 0.5957, - "step": 6865 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3587186120651982e-05, - "loss": 0.7452, - "step": 6866 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.358532425876391e-05, - "loss": 0.6431, - "step": 6867 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3583462254245495e-05, - "loss": 0.6933, - "step": 6868 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3581600107170814e-05, - "loss": 0.6359, - "step": 6869 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.357973781761394e-05, - "loss": 0.5247, - "step": 6870 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3577875385648969e-05, - "loss": 0.6302, - "step": 6871 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3576012811349983e-05, - "loss": 0.6179, - "step": 6872 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3574150094791084e-05, - "loss": 0.6984, - "step": 6873 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.357228723604637e-05, - "loss": 0.7687, - "step": 6874 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3570424235189947e-05, - "loss": 0.6908, - "step": 6875 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3568561092295936e-05, - "loss": 0.8234, - "step": 6876 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.356669780743845e-05, - "loss": 0.72, - "step": 6877 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3564834380691612e-05, - "loss": 0.7068, - "step": 6878 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3562970812129559e-05, - "loss": 0.6517, - "step": 6879 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3561107101826424e-05, - "loss": 0.7594, - "step": 6880 - }, - { - "epoch": 1.24, - "grad_norm": 0.0, - "learning_rate": 1.3559243249856344e-05, - "loss": 0.6676, - "step": 6881 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3557379256293473e-05, - "loss": 0.6465, - "step": 6882 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.355551512121196e-05, - "loss": 0.665, - "step": 6883 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3553650844685967e-05, - "loss": 0.551, - "step": 6884 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3551786426789652e-05, - "loss": 0.7382, - "step": 6885 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.354992186759719e-05, - "loss": 0.6509, - "step": 6886 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3548057167182758e-05, - "loss": 0.7701, - "step": 6887 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3546192325620531e-05, - "loss": 0.7184, - "step": 6888 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3544327342984695e-05, - "loss": 0.5045, - "step": 6889 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.354246221934945e-05, - "loss": 0.5866, - "step": 6890 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3540596954788988e-05, - "loss": 0.5938, - "step": 6891 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3538731549377512e-05, - "loss": 0.6154, - "step": 6892 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3536866003189234e-05, - "loss": 0.7272, - "step": 6893 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.353500031629837e-05, - "loss": 0.6363, - "step": 6894 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3533134488779136e-05, - "loss": 0.5556, - "step": 6895 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.353126852070576e-05, - "loss": 0.618, - "step": 6896 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3529402412152468e-05, - "loss": 0.6675, - "step": 6897 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3527536163193509e-05, - "loss": 0.6096, - "step": 6898 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3525669773903114e-05, - "loss": 0.6581, - "step": 6899 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3523803244355535e-05, - "loss": 0.5411, - "step": 6900 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3521936574625029e-05, - "loss": 0.7294, - "step": 6901 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3520069764785853e-05, - "loss": 0.5293, - "step": 6902 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.351820281491227e-05, - "loss": 0.6005, - "step": 6903 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3516335725078552e-05, - "loss": 0.7646, - "step": 6904 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3514468495358974e-05, - "loss": 0.7172, - "step": 6905 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3512601125827823e-05, - "loss": 0.6421, - "step": 6906 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3510733616559376e-05, - "loss": 0.7466, - "step": 6907 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3508865967627933e-05, - "loss": 0.6035, - "step": 6908 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3506998179107796e-05, - "loss": 0.5631, - "step": 6909 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.350513025107326e-05, - "loss": 0.7543, - "step": 6910 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3503262183598636e-05, - "loss": 0.6285, - "step": 6911 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3501393976758242e-05, - "loss": 0.6307, - "step": 6912 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3499525630626397e-05, - "loss": 0.646, - "step": 6913 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.349765714527743e-05, - "loss": 0.5872, - "step": 6914 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3495788520785666e-05, - "loss": 0.6316, - "step": 6915 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3493919757225449e-05, - "loss": 0.596, - "step": 6916 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.349205085467112e-05, - "loss": 0.6101, - "step": 6917 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3490181813197023e-05, - "loss": 0.5263, - "step": 6918 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3488312632877514e-05, - "loss": 0.6568, - "step": 6919 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3486443313786955e-05, - "loss": 0.579, - "step": 6920 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3484573855999705e-05, - "loss": 0.6134, - "step": 6921 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.348270425959014e-05, - "loss": 0.7137, - "step": 6922 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3480834524632634e-05, - "loss": 0.6869, - "step": 6923 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3478964651201567e-05, - "loss": 0.6453, - "step": 6924 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3477094639371326e-05, - "loss": 0.5845, - "step": 6925 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3475224489216303e-05, - "loss": 0.73, - "step": 6926 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3473354200810898e-05, - "loss": 0.7306, - "step": 6927 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3471483774229512e-05, - "loss": 0.6501, - "step": 6928 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3469613209546555e-05, - "loss": 0.5753, - "step": 6929 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3467742506836438e-05, - "loss": 0.6127, - "step": 6930 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3465871666173586e-05, - "loss": 0.6083, - "step": 6931 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3464000687632421e-05, - "loss": 0.6986, - "step": 6932 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3462129571287377e-05, - "loss": 0.5761, - "step": 6933 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3460258317212884e-05, - "loss": 0.6052, - "step": 6934 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3458386925483389e-05, - "loss": 0.6282, - "step": 6935 - }, - { - "epoch": 1.25, - "grad_norm": 0.0, - "learning_rate": 1.3456515396173337e-05, - "loss": 0.6189, - "step": 6936 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3454643729357185e-05, - "loss": 0.7534, - "step": 6937 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.345277192510938e-05, - "loss": 0.602, - "step": 6938 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3450899983504398e-05, - "loss": 0.7112, - "step": 6939 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3449027904616703e-05, - "loss": 0.7701, - "step": 6940 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3447155688520768e-05, - "loss": 0.5961, - "step": 6941 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3445283335291075e-05, - "loss": 0.6209, - "step": 6942 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.344341084500211e-05, - "loss": 0.6229, - "step": 6943 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3441538217728361e-05, - "loss": 0.643, - "step": 6944 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3439665453544324e-05, - "loss": 0.5547, - "step": 6945 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3437792552524504e-05, - "loss": 0.7184, - "step": 6946 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3435919514743411e-05, - "loss": 0.5386, - "step": 6947 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3434046340275549e-05, - "loss": 0.6134, - "step": 6948 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3432173029195443e-05, - "loss": 0.5873, - "step": 6949 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3430299581577612e-05, - "loss": 0.5839, - "step": 6950 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.342842599749659e-05, - "loss": 0.6533, - "step": 6951 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3426552277026908e-05, - "loss": 0.6058, - "step": 6952 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3424678420243105e-05, - "loss": 0.6763, - "step": 6953 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3422804427219726e-05, - "loss": 0.5955, - "step": 6954 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.342093029803133e-05, - "loss": 0.7034, - "step": 6955 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3419056032752461e-05, - "loss": 0.6129, - "step": 6956 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3417181631457686e-05, - "loss": 0.6731, - "step": 6957 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3415307094221573e-05, - "loss": 0.747, - "step": 6958 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3413432421118693e-05, - "loss": 0.7016, - "step": 6959 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3411557612223625e-05, - "loss": 0.6148, - "step": 6960 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3409682667610948e-05, - "loss": 0.6397, - "step": 6961 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3407807587355254e-05, - "loss": 0.7349, - "step": 6962 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3405932371531138e-05, - "loss": 0.5848, - "step": 6963 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.34040570202132e-05, - "loss": 0.58, - "step": 6964 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3402181533476036e-05, - "loss": 0.6346, - "step": 6965 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3400305911394266e-05, - "loss": 0.6955, - "step": 6966 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3398430154042502e-05, - "loss": 0.693, - "step": 6967 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3396554261495363e-05, - "loss": 0.6234, - "step": 6968 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3394678233827476e-05, - "loss": 0.6597, - "step": 6969 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3392802071113475e-05, - "loss": 0.7426, - "step": 6970 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3390925773427996e-05, - "loss": 0.537, - "step": 6971 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3389049340845681e-05, - "loss": 0.7028, - "step": 6972 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3387172773441174e-05, - "loss": 0.5445, - "step": 6973 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3385296071289138e-05, - "loss": 0.6934, - "step": 6974 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.338341923446422e-05, - "loss": 0.6422, - "step": 6975 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3381542263041088e-05, - "loss": 0.6485, - "step": 6976 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3379665157094414e-05, - "loss": 0.5837, - "step": 6977 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.337778791669887e-05, - "loss": 0.5865, - "step": 6978 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3375910541929136e-05, - "loss": 0.6544, - "step": 6979 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3374033032859899e-05, - "loss": 0.6447, - "step": 6980 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3372155389565845e-05, - "loss": 0.5722, - "step": 6981 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3370277612121677e-05, - "loss": 0.6776, - "step": 6982 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3368399700602089e-05, - "loss": 0.6776, - "step": 6983 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3366521655081787e-05, - "loss": 0.6718, - "step": 6984 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3364643475635495e-05, - "loss": 0.7426, - "step": 6985 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3362765162337914e-05, - "loss": 0.7386, - "step": 6986 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3360886715263778e-05, - "loss": 0.68, - "step": 6987 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.335900813448781e-05, - "loss": 0.576, - "step": 6988 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3357129420084744e-05, - "loss": 0.6029, - "step": 6989 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.335525057212932e-05, - "loss": 0.6268, - "step": 6990 - }, - { - "epoch": 1.26, - "grad_norm": 0.0, - "learning_rate": 1.3353371590696278e-05, - "loss": 0.5692, - "step": 6991 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3351492475860368e-05, - "loss": 0.6478, - "step": 6992 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3349613227696352e-05, - "loss": 0.6925, - "step": 6993 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3347733846278979e-05, - "loss": 0.6464, - "step": 6994 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.334585433168302e-05, - "loss": 0.7079, - "step": 6995 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3343974683983244e-05, - "loss": 0.702, - "step": 6996 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3342094903254423e-05, - "loss": 0.6947, - "step": 6997 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3340214989571345e-05, - "loss": 0.5965, - "step": 6998 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3338334943008792e-05, - "loss": 0.6424, - "step": 6999 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3336454763641557e-05, - "loss": 0.69, - "step": 7000 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3334574451544435e-05, - "loss": 0.668, - "step": 7001 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3332694006792229e-05, - "loss": 0.7141, - "step": 7002 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3330813429459743e-05, - "loss": 0.6927, - "step": 7003 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3328932719621799e-05, - "loss": 0.6834, - "step": 7004 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3327051877353206e-05, - "loss": 0.614, - "step": 7005 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3325170902728788e-05, - "loss": 0.6202, - "step": 7006 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3323289795823375e-05, - "loss": 0.7759, - "step": 7007 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3321408556711803e-05, - "loss": 0.7064, - "step": 7008 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3319527185468908e-05, - "loss": 0.6218, - "step": 7009 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3317645682169535e-05, - "loss": 0.6496, - "step": 7010 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3315764046888532e-05, - "loss": 0.539, - "step": 7011 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3313882279700759e-05, - "loss": 0.6996, - "step": 7012 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3312000380681068e-05, - "loss": 0.606, - "step": 7013 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3310118349904329e-05, - "loss": 0.6941, - "step": 7014 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3308236187445414e-05, - "loss": 0.552, - "step": 7015 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3306353893379192e-05, - "loss": 0.5917, - "step": 7016 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3304471467780549e-05, - "loss": 0.7199, - "step": 7017 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3302588910724369e-05, - "loss": 0.6038, - "step": 7018 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.330070622228555e-05, - "loss": 0.5445, - "step": 7019 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3298823402538978e-05, - "loss": 0.6539, - "step": 7020 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3296940451559562e-05, - "loss": 0.6763, - "step": 7021 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3295057369422203e-05, - "loss": 0.6341, - "step": 7022 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3293174156201824e-05, - "loss": 0.7668, - "step": 7023 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3291290811973329e-05, - "loss": 0.6146, - "step": 7024 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3289407336811651e-05, - "loss": 0.6231, - "step": 7025 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3287523730791713e-05, - "loss": 0.751, - "step": 7026 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3285639993988448e-05, - "loss": 0.6767, - "step": 7027 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3283756126476797e-05, - "loss": 0.5495, - "step": 7028 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3281872128331703e-05, - "loss": 0.6956, - "step": 7029 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.327998799962811e-05, - "loss": 0.5347, - "step": 7030 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.327810374044098e-05, - "loss": 0.7695, - "step": 7031 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3276219350845264e-05, - "loss": 0.636, - "step": 7032 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3274334830915932e-05, - "loss": 0.6213, - "step": 7033 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3272450180727949e-05, - "loss": 0.8553, - "step": 7034 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3270565400356293e-05, - "loss": 0.6251, - "step": 7035 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3268680489875943e-05, - "loss": 0.5871, - "step": 7036 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3266795449361883e-05, - "loss": 0.6429, - "step": 7037 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3264910278889103e-05, - "loss": 0.653, - "step": 7038 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3263024978532599e-05, - "loss": 0.6353, - "step": 7039 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3261139548367372e-05, - "loss": 0.6482, - "step": 7040 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3259253988468425e-05, - "loss": 0.5953, - "step": 7041 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3257368298910778e-05, - "loss": 0.6156, - "step": 7042 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3255482479769432e-05, - "loss": 0.6424, - "step": 7043 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.325359653111942e-05, - "loss": 0.6396, - "step": 7044 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3251710453035763e-05, - "loss": 0.6486, - "step": 7045 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.3249824245593495e-05, - "loss": 0.6877, - "step": 7046 - }, - { - "epoch": 1.27, - "grad_norm": 0.0, - "learning_rate": 1.324793790886765e-05, - "loss": 0.6758, - "step": 7047 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3246051442933272e-05, - "loss": 0.6384, - "step": 7048 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3244164847865405e-05, - "loss": 0.6044, - "step": 7049 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3242278123739105e-05, - "loss": 0.6527, - "step": 7050 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3240391270629425e-05, - "loss": 0.5793, - "step": 7051 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.323850428861143e-05, - "loss": 0.7138, - "step": 7052 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3236617177760187e-05, - "loss": 0.6405, - "step": 7053 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3234729938150765e-05, - "loss": 0.6414, - "step": 7054 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3232842569858249e-05, - "loss": 0.6075, - "step": 7055 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3230955072957714e-05, - "loss": 0.6718, - "step": 7056 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3229067447524254e-05, - "loss": 0.645, - "step": 7057 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3227179693632958e-05, - "loss": 0.6414, - "step": 7058 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3225291811358925e-05, - "loss": 0.6664, - "step": 7059 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3223403800777257e-05, - "loss": 0.797, - "step": 7060 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3221515661963068e-05, - "loss": 0.6993, - "step": 7061 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3219627394991464e-05, - "loss": 0.5761, - "step": 7062 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3217738999937567e-05, - "loss": 0.8289, - "step": 7063 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3215850476876502e-05, - "loss": 0.7297, - "step": 7064 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3213961825883394e-05, - "loss": 0.6018, - "step": 7065 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3212073047033381e-05, - "loss": 0.591, - "step": 7066 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3210184140401597e-05, - "loss": 0.6666, - "step": 7067 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3208295106063189e-05, - "loss": 0.5733, - "step": 7068 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3206405944093307e-05, - "loss": 0.6856, - "step": 7069 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3204516654567101e-05, - "loss": 0.655, - "step": 7070 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3202627237559735e-05, - "loss": 0.5975, - "step": 7071 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3200737693146375e-05, - "loss": 0.6841, - "step": 7072 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3198848021402179e-05, - "loss": 0.6407, - "step": 7073 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3196958222402332e-05, - "loss": 0.6035, - "step": 7074 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3195068296222012e-05, - "loss": 0.6677, - "step": 7075 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3193178242936399e-05, - "loss": 0.5868, - "step": 7076 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3191288062620689e-05, - "loss": 0.6006, - "step": 7077 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3189397755350067e-05, - "loss": 0.6443, - "step": 7078 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3187507321199742e-05, - "loss": 0.6414, - "step": 7079 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3185616760244916e-05, - "loss": 0.7221, - "step": 7080 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3183726072560796e-05, - "loss": 0.637, - "step": 7081 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3181835258222599e-05, - "loss": 0.6733, - "step": 7082 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3179944317305546e-05, - "loss": 0.6872, - "step": 7083 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3178053249884862e-05, - "loss": 0.6735, - "step": 7084 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3176162056035773e-05, - "loss": 0.6378, - "step": 7085 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3174270735833515e-05, - "loss": 0.6399, - "step": 7086 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3172379289353331e-05, - "loss": 0.6152, - "step": 7087 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3170487716670467e-05, - "loss": 0.6121, - "step": 7088 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3168596017860164e-05, - "loss": 0.6457, - "step": 7089 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3166704192997688e-05, - "loss": 0.6669, - "step": 7090 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3164812242158294e-05, - "loss": 0.6444, - "step": 7091 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3162920165417246e-05, - "loss": 0.5852, - "step": 7092 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3161027962849816e-05, - "loss": 0.6614, - "step": 7093 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.315913563453128e-05, - "loss": 0.6461, - "step": 7094 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3157243180536913e-05, - "loss": 0.6363, - "step": 7095 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3155350600942008e-05, - "loss": 0.6217, - "step": 7096 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3153457895821846e-05, - "loss": 0.7775, - "step": 7097 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.315156506525173e-05, - "loss": 0.6273, - "step": 7098 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3149672109306956e-05, - "loss": 0.6241, - "step": 7099 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.314777902806283e-05, - "loss": 0.6633, - "step": 7100 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3145885821594662e-05, - "loss": 0.6401, - "step": 7101 - }, - { - "epoch": 1.28, - "grad_norm": 0.0, - "learning_rate": 1.3143992489977767e-05, - "loss": 0.676, - "step": 7102 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3142099033287463e-05, - "loss": 0.685, - "step": 7103 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3140205451599077e-05, - "loss": 0.6201, - "step": 7104 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3138311744987936e-05, - "loss": 0.6904, - "step": 7105 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3136417913529384e-05, - "loss": 0.6626, - "step": 7106 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3134523957298755e-05, - "loss": 0.6124, - "step": 7107 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3132629876371388e-05, - "loss": 0.5571, - "step": 7108 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3130735670822639e-05, - "loss": 0.6897, - "step": 7109 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3128841340727862e-05, - "loss": 0.6184, - "step": 7110 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3126946886162417e-05, - "loss": 0.7882, - "step": 7111 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3125052307201668e-05, - "loss": 0.6339, - "step": 7112 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3123157603920987e-05, - "loss": 0.6719, - "step": 7113 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3121262776395743e-05, - "loss": 0.6173, - "step": 7114 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3119367824701322e-05, - "loss": 0.6491, - "step": 7115 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3117472748913099e-05, - "loss": 0.6511, - "step": 7116 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3115577549106475e-05, - "loss": 0.5868, - "step": 7117 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3113682225356838e-05, - "loss": 0.6554, - "step": 7118 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3111786777739586e-05, - "loss": 0.5839, - "step": 7119 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3109891206330128e-05, - "loss": 0.6498, - "step": 7120 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3107995511203867e-05, - "loss": 0.6722, - "step": 7121 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3106099692436223e-05, - "loss": 0.6305, - "step": 7122 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3104203750102614e-05, - "loss": 0.6643, - "step": 7123 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3102307684278454e-05, - "loss": 0.655, - "step": 7124 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.310041149503919e-05, - "loss": 0.6373, - "step": 7125 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3098515182460239e-05, - "loss": 0.7589, - "step": 7126 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3096618746617048e-05, - "loss": 0.5948, - "step": 7127 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3094722187585055e-05, - "loss": 0.6535, - "step": 7128 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3092825505439718e-05, - "loss": 0.7284, - "step": 7129 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.309092870025648e-05, - "loss": 0.5985, - "step": 7130 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3089031772110802e-05, - "loss": 0.8353, - "step": 7131 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.308713472107815e-05, - "loss": 0.6081, - "step": 7132 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3085237547233993e-05, - "loss": 0.5778, - "step": 7133 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.30833402506538e-05, - "loss": 0.6529, - "step": 7134 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3081442831413044e-05, - "loss": 0.5399, - "step": 7135 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.307954528958722e-05, - "loss": 0.6243, - "step": 7136 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3077647625251809e-05, - "loss": 0.6306, - "step": 7137 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3075749838482299e-05, - "loss": 0.6131, - "step": 7138 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3073851929354193e-05, - "loss": 0.6695, - "step": 7139 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3071953897942993e-05, - "loss": 0.7744, - "step": 7140 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3070055744324201e-05, - "loss": 0.6828, - "step": 7141 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3068157468573336e-05, - "loss": 0.6727, - "step": 7142 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3066259070765908e-05, - "loss": 0.6341, - "step": 7143 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3064360550977445e-05, - "loss": 0.6748, - "step": 7144 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3062461909283466e-05, - "loss": 0.617, - "step": 7145 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3060563145759507e-05, - "loss": 0.7514, - "step": 7146 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3058664260481102e-05, - "loss": 0.6189, - "step": 7147 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3056765253523796e-05, - "loss": 0.6513, - "step": 7148 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3054866124963128e-05, - "loss": 0.6395, - "step": 7149 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3052966874874655e-05, - "loss": 0.5924, - "step": 7150 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3051067503333927e-05, - "loss": 0.6252, - "step": 7151 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.304916801041651e-05, - "loss": 0.596, - "step": 7152 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3047268396197966e-05, - "loss": 0.6765, - "step": 7153 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3045368660753861e-05, - "loss": 0.6601, - "step": 7154 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3043468804159777e-05, - "loss": 0.6797, - "step": 7155 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3041568826491292e-05, - "loss": 0.5754, - "step": 7156 - }, - { - "epoch": 1.29, - "grad_norm": 0.0, - "learning_rate": 1.3039668727823984e-05, - "loss": 0.5593, - "step": 7157 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3037768508233451e-05, - "loss": 0.6998, - "step": 7158 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.303586816779528e-05, - "loss": 0.5733, - "step": 7159 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3033967706585073e-05, - "loss": 0.6229, - "step": 7160 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3032067124678433e-05, - "loss": 0.6568, - "step": 7161 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3030166422150966e-05, - "loss": 0.6236, - "step": 7162 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3028265599078295e-05, - "loss": 0.5901, - "step": 7163 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3026364655536026e-05, - "loss": 0.6477, - "step": 7164 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3024463591599785e-05, - "loss": 0.5945, - "step": 7165 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3022562407345204e-05, - "loss": 0.6353, - "step": 7166 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3020661102847912e-05, - "loss": 0.6052, - "step": 7167 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3018759678183547e-05, - "loss": 0.6783, - "step": 7168 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.301685813342775e-05, - "loss": 0.6807, - "step": 7169 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3014956468656165e-05, - "loss": 0.6399, - "step": 7170 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3013054683944453e-05, - "loss": 0.59, - "step": 7171 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3011152779368261e-05, - "loss": 0.7225, - "step": 7172 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.300925075500325e-05, - "loss": 0.5991, - "step": 7173 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3007348610925096e-05, - "loss": 0.5646, - "step": 7174 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3005446347209456e-05, - "loss": 0.6598, - "step": 7175 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3003543963932015e-05, - "loss": 0.6197, - "step": 7176 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.3001641461168448e-05, - "loss": 0.6607, - "step": 7177 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2999738838994445e-05, - "loss": 0.5889, - "step": 7178 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2997836097485687e-05, - "loss": 0.5751, - "step": 7179 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2995933236717878e-05, - "loss": 0.6261, - "step": 7180 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2994030256766708e-05, - "loss": 0.6649, - "step": 7181 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.299212715770789e-05, - "loss": 0.5932, - "step": 7182 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2990223939617126e-05, - "loss": 0.6911, - "step": 7183 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2988320602570128e-05, - "loss": 0.6533, - "step": 7184 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2986417146642621e-05, - "loss": 0.6705, - "step": 7185 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2984513571910322e-05, - "loss": 0.6234, - "step": 7186 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2982609878448962e-05, - "loss": 0.6346, - "step": 7187 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2980706066334273e-05, - "loss": 0.5986, - "step": 7188 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2978802135641987e-05, - "loss": 0.694, - "step": 7189 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2976898086447851e-05, - "loss": 0.6497, - "step": 7190 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2974993918827607e-05, - "loss": 0.6053, - "step": 7191 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.297308963285701e-05, - "loss": 0.6317, - "step": 7192 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2971185228611815e-05, - "loss": 0.5463, - "step": 7193 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.296928070616778e-05, - "loss": 0.6251, - "step": 7194 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2967376065600674e-05, - "loss": 0.5201, - "step": 7195 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2965471306986264e-05, - "loss": 0.6886, - "step": 7196 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2963566430400323e-05, - "loss": 0.5587, - "step": 7197 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2961661435918635e-05, - "loss": 0.676, - "step": 7198 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2959756323616981e-05, - "loss": 0.5947, - "step": 7199 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.295785109357115e-05, - "loss": 0.6611, - "step": 7200 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2955945745856937e-05, - "loss": 0.6611, - "step": 7201 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2954040280550136e-05, - "loss": 0.6275, - "step": 7202 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.295213469772655e-05, - "loss": 0.6316, - "step": 7203 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2950228997461994e-05, - "loss": 0.6525, - "step": 7204 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2948323179832271e-05, - "loss": 0.7483, - "step": 7205 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2946417244913204e-05, - "loss": 0.6793, - "step": 7206 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2944511192780609e-05, - "loss": 0.6776, - "step": 7207 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.294260502351031e-05, - "loss": 0.6888, - "step": 7208 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2940698737178152e-05, - "loss": 0.7233, - "step": 7209 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2938792333859956e-05, - "loss": 0.6151, - "step": 7210 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2936885813631564e-05, - "loss": 0.566, - "step": 7211 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2934979176568827e-05, - "loss": 0.5589, - "step": 7212 - }, - { - "epoch": 1.3, - "grad_norm": 0.0, - "learning_rate": 1.2933072422747588e-05, - "loss": 0.6494, - "step": 7213 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2931165552243704e-05, - "loss": 0.5663, - "step": 7214 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2929258565133034e-05, - "loss": 0.6784, - "step": 7215 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.292735146149144e-05, - "loss": 0.5567, - "step": 7216 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2925444241394788e-05, - "loss": 0.6553, - "step": 7217 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2923536904918955e-05, - "loss": 0.6691, - "step": 7218 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2921629452139812e-05, - "loss": 0.6294, - "step": 7219 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.291972188313325e-05, - "loss": 0.6308, - "step": 7220 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2917814197975144e-05, - "loss": 0.6223, - "step": 7221 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2915906396741394e-05, - "loss": 0.6424, - "step": 7222 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2913998479507892e-05, - "loss": 0.6201, - "step": 7223 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2912090446350538e-05, - "loss": 0.5744, - "step": 7224 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2910182297345237e-05, - "loss": 0.6855, - "step": 7225 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.29082740325679e-05, - "loss": 0.5867, - "step": 7226 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2906365652094438e-05, - "loss": 0.6852, - "step": 7227 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2904457156000776e-05, - "loss": 0.7311, - "step": 7228 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2902548544362827e-05, - "loss": 0.6636, - "step": 7229 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2900639817256525e-05, - "loss": 0.6587, - "step": 7230 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2898730974757809e-05, - "loss": 0.6429, - "step": 7231 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.28968220169426e-05, - "loss": 0.6853, - "step": 7232 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2894912943886855e-05, - "loss": 0.5837, - "step": 7233 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2893003755666512e-05, - "loss": 0.6329, - "step": 7234 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2891094452357521e-05, - "loss": 0.6341, - "step": 7235 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2889185034035843e-05, - "loss": 0.538, - "step": 7236 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2887275500777435e-05, - "loss": 0.6668, - "step": 7237 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2885365852658259e-05, - "loss": 0.6785, - "step": 7238 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2883456089754289e-05, - "loss": 0.7177, - "step": 7239 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2881546212141493e-05, - "loss": 0.5874, - "step": 7240 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2879636219895855e-05, - "loss": 0.6343, - "step": 7241 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2877726113093354e-05, - "loss": 0.6716, - "step": 7242 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2875815891809978e-05, - "loss": 0.6512, - "step": 7243 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2873905556121722e-05, - "loss": 0.6137, - "step": 7244 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2871995106104578e-05, - "loss": 0.5666, - "step": 7245 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2870084541834547e-05, - "loss": 0.7161, - "step": 7246 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2868173863387638e-05, - "loss": 0.5884, - "step": 7247 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2866263070839857e-05, - "loss": 0.7311, - "step": 7248 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2864352164267224e-05, - "loss": 0.6878, - "step": 7249 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2862441143745756e-05, - "loss": 0.6568, - "step": 7250 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2860530009351471e-05, - "loss": 0.7054, - "step": 7251 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2858618761160403e-05, - "loss": 0.665, - "step": 7252 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2856707399248588e-05, - "loss": 0.6318, - "step": 7253 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2854795923692054e-05, - "loss": 0.5817, - "step": 7254 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2852884334566848e-05, - "loss": 0.6199, - "step": 7255 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.285097263194902e-05, - "loss": 0.6206, - "step": 7256 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2849060815914611e-05, - "loss": 0.69, - "step": 7257 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2847148886539686e-05, - "loss": 0.6771, - "step": 7258 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.28452368439003e-05, - "loss": 0.6691, - "step": 7259 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2843324688072519e-05, - "loss": 0.6342, - "step": 7260 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2841412419132409e-05, - "loss": 0.7525, - "step": 7261 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2839500037156047e-05, - "loss": 0.7578, - "step": 7262 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2837587542219507e-05, - "loss": 0.5545, - "step": 7263 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2835674934398877e-05, - "loss": 0.5335, - "step": 7264 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2833762213770238e-05, - "loss": 0.633, - "step": 7265 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2831849380409685e-05, - "loss": 0.7734, - "step": 7266 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.282993643439331e-05, - "loss": 0.6794, - "step": 7267 - }, - { - "epoch": 1.31, - "grad_norm": 0.0, - "learning_rate": 1.2828023375797217e-05, - "loss": 0.6083, - "step": 7268 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.282611020469751e-05, - "loss": 0.6475, - "step": 7269 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2824196921170296e-05, - "loss": 0.6312, - "step": 7270 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2822283525291691e-05, - "loss": 0.6506, - "step": 7271 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2820370017137816e-05, - "loss": 0.7084, - "step": 7272 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2818456396784787e-05, - "loss": 0.5745, - "step": 7273 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2816542664308733e-05, - "loss": 0.6804, - "step": 7274 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.281462881978579e-05, - "loss": 0.518, - "step": 7275 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2812714863292085e-05, - "loss": 0.5372, - "step": 7276 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2810800794903773e-05, - "loss": 0.6572, - "step": 7277 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2808886614696982e-05, - "loss": 0.5782, - "step": 7278 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2806972322747876e-05, - "loss": 0.5953, - "step": 7279 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2805057919132601e-05, - "loss": 0.6711, - "step": 7280 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2803143403927314e-05, - "loss": 0.6738, - "step": 7281 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2801228777208181e-05, - "loss": 0.6774, - "step": 7282 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.279931403905137e-05, - "loss": 0.6021, - "step": 7283 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2797399189533049e-05, - "loss": 0.7503, - "step": 7284 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2795484228729398e-05, - "loss": 0.6276, - "step": 7285 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2793569156716592e-05, - "loss": 0.7294, - "step": 7286 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2791653973570823e-05, - "loss": 0.7161, - "step": 7287 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2789738679368277e-05, - "loss": 0.6019, - "step": 7288 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2787823274185145e-05, - "loss": 0.7314, - "step": 7289 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2785907758097627e-05, - "loss": 0.6817, - "step": 7290 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2783992131181929e-05, - "loss": 0.7223, - "step": 7291 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2782076393514252e-05, - "loss": 0.6061, - "step": 7292 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2780160545170812e-05, - "loss": 0.6435, - "step": 7293 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.277824458622782e-05, - "loss": 0.7023, - "step": 7294 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2776328516761498e-05, - "loss": 0.5975, - "step": 7295 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2774412336848076e-05, - "loss": 0.6654, - "step": 7296 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2772496046563775e-05, - "loss": 0.6176, - "step": 7297 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2770579645984836e-05, - "loss": 0.6675, - "step": 7298 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2768663135187489e-05, - "loss": 0.6537, - "step": 7299 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.276674651424798e-05, - "loss": 0.6612, - "step": 7300 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2764829783242557e-05, - "loss": 0.6227, - "step": 7301 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2762912942247466e-05, - "loss": 0.6242, - "step": 7302 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2760995991338967e-05, - "loss": 0.6433, - "step": 7303 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2759078930593316e-05, - "loss": 0.6825, - "step": 7304 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2757161760086776e-05, - "loss": 0.7323, - "step": 7305 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2755244479895623e-05, - "loss": 0.6806, - "step": 7306 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2753327090096124e-05, - "loss": 0.7265, - "step": 7307 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2751409590764555e-05, - "loss": 0.6922, - "step": 7308 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2749491981977197e-05, - "loss": 0.6335, - "step": 7309 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2747574263810341e-05, - "loss": 0.5883, - "step": 7310 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2745656436340275e-05, - "loss": 0.6606, - "step": 7311 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.274373849964329e-05, - "loss": 0.6246, - "step": 7312 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2741820453795686e-05, - "loss": 0.7334, - "step": 7313 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2739902298873768e-05, - "loss": 0.702, - "step": 7314 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2737984034953845e-05, - "loss": 0.6798, - "step": 7315 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2736065662112225e-05, - "loss": 0.5653, - "step": 7316 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2734147180425227e-05, - "loss": 0.6874, - "step": 7317 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.273222858996917e-05, - "loss": 0.6537, - "step": 7318 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2730309890820379e-05, - "loss": 0.6477, - "step": 7319 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2728391083055182e-05, - "loss": 0.6262, - "step": 7320 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2726472166749914e-05, - "loss": 0.5829, - "step": 7321 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2724553141980916e-05, - "loss": 0.6171, - "step": 7322 - }, - { - "epoch": 1.32, - "grad_norm": 0.0, - "learning_rate": 1.2722634008824524e-05, - "loss": 0.5814, - "step": 7323 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2720714767357083e-05, - "loss": 0.6018, - "step": 7324 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2718795417654952e-05, - "loss": 0.6073, - "step": 7325 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2716875959794486e-05, - "loss": 0.5991, - "step": 7326 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2714956393852032e-05, - "loss": 0.6426, - "step": 7327 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2713036719903965e-05, - "loss": 0.6724, - "step": 7328 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.271111693802665e-05, - "loss": 0.5532, - "step": 7329 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.270919704829646e-05, - "loss": 0.7287, - "step": 7330 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2707277050789767e-05, - "loss": 0.6607, - "step": 7331 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2705356945582956e-05, - "loss": 0.5978, - "step": 7332 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2703436732752415e-05, - "loss": 0.7213, - "step": 7333 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2701516412374527e-05, - "loss": 0.5995, - "step": 7334 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2699595984525686e-05, - "loss": 0.6464, - "step": 7335 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2697675449282291e-05, - "loss": 0.6435, - "step": 7336 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.269575480672075e-05, - "loss": 0.5868, - "step": 7337 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2693834056917463e-05, - "loss": 0.629, - "step": 7338 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2691913199948843e-05, - "loss": 0.6904, - "step": 7339 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.26899922358913e-05, - "loss": 0.6221, - "step": 7340 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2688071164821265e-05, - "loss": 0.5695, - "step": 7341 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2686149986815148e-05, - "loss": 0.6662, - "step": 7342 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2684228701949386e-05, - "loss": 0.5805, - "step": 7343 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2682307310300406e-05, - "loss": 0.6631, - "step": 7344 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2680385811944648e-05, - "loss": 0.697, - "step": 7345 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2678464206958549e-05, - "loss": 0.6845, - "step": 7346 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2676542495418557e-05, - "loss": 0.5755, - "step": 7347 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.267462067740112e-05, - "loss": 0.6919, - "step": 7348 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2672698752982689e-05, - "loss": 0.6058, - "step": 7349 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2670776722239724e-05, - "loss": 0.5763, - "step": 7350 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2668854585248683e-05, - "loss": 0.6978, - "step": 7351 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2666932342086038e-05, - "loss": 0.5954, - "step": 7352 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2665009992828254e-05, - "loss": 0.6389, - "step": 7353 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2663087537551806e-05, - "loss": 0.7041, - "step": 7354 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2661164976333175e-05, - "loss": 0.6123, - "step": 7355 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2659242309248843e-05, - "loss": 0.6468, - "step": 7356 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2657319536375296e-05, - "loss": 0.5362, - "step": 7357 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2655396657789026e-05, - "loss": 0.648, - "step": 7358 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2653473673566525e-05, - "loss": 0.5795, - "step": 7359 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.26515505837843e-05, - "loss": 0.5716, - "step": 7360 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2649627388518848e-05, - "loss": 0.6671, - "step": 7361 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.264770408784668e-05, - "loss": 0.6674, - "step": 7362 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2645780681844306e-05, - "loss": 0.574, - "step": 7363 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2643857170588245e-05, - "loss": 0.6397, - "step": 7364 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2641933554155018e-05, - "loss": 0.7645, - "step": 7365 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2640009832621148e-05, - "loss": 0.6464, - "step": 7366 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2638086006063164e-05, - "loss": 0.5111, - "step": 7367 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2636162074557599e-05, - "loss": 0.6731, - "step": 7368 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.263423803818099e-05, - "loss": 0.6343, - "step": 7369 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2632313897009878e-05, - "loss": 0.6897, - "step": 7370 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2630389651120813e-05, - "loss": 0.6857, - "step": 7371 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2628465300590343e-05, - "loss": 0.6538, - "step": 7372 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2626540845495015e-05, - "loss": 0.6319, - "step": 7373 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2624616285911395e-05, - "loss": 0.5568, - "step": 7374 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2622691621916043e-05, - "loss": 0.6533, - "step": 7375 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2620766853585526e-05, - "loss": 0.7449, - "step": 7376 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2618841980996414e-05, - "loss": 0.6612, - "step": 7377 - }, - { - "epoch": 1.33, - "grad_norm": 0.0, - "learning_rate": 1.2616917004225279e-05, - "loss": 0.5749, - "step": 7378 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2614991923348709e-05, - "loss": 0.6424, - "step": 7379 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2613066738443276e-05, - "loss": 0.6415, - "step": 7380 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2611141449585568e-05, - "loss": 0.5461, - "step": 7381 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2609216056852187e-05, - "loss": 0.6775, - "step": 7382 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2607290560319718e-05, - "loss": 0.5694, - "step": 7383 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2605364960064765e-05, - "loss": 0.6782, - "step": 7384 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.260343925616393e-05, - "loss": 0.7389, - "step": 7385 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.260151344869382e-05, - "loss": 0.6105, - "step": 7386 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2599587537731053e-05, - "loss": 0.6765, - "step": 7387 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2597661523352235e-05, - "loss": 0.6097, - "step": 7388 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2595735405633993e-05, - "loss": 0.5467, - "step": 7389 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2593809184652955e-05, - "loss": 0.6913, - "step": 7390 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2591882860485738e-05, - "loss": 0.5221, - "step": 7391 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2589956433208981e-05, - "loss": 0.6892, - "step": 7392 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2588029902899324e-05, - "loss": 0.6024, - "step": 7393 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.25861032696334e-05, - "loss": 0.6082, - "step": 7394 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2584176533487859e-05, - "loss": 0.6851, - "step": 7395 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.258224969453935e-05, - "loss": 0.5878, - "step": 7396 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2580322752864522e-05, - "loss": 0.5706, - "step": 7397 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.257839570854004e-05, - "loss": 0.5975, - "step": 7398 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2576468561642555e-05, - "loss": 0.597, - "step": 7399 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2574541312248735e-05, - "loss": 0.5974, - "step": 7400 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2572613960435254e-05, - "loss": 0.6523, - "step": 7401 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2570686506278782e-05, - "loss": 0.577, - "step": 7402 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2568758949855999e-05, - "loss": 0.5826, - "step": 7403 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2566831291243583e-05, - "loss": 0.6365, - "step": 7404 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.256490353051822e-05, - "loss": 0.5796, - "step": 7405 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2562975667756603e-05, - "loss": 0.6863, - "step": 7406 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.256104770303542e-05, - "loss": 0.6245, - "step": 7407 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2559119636431375e-05, - "loss": 0.6666, - "step": 7408 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2557191468021166e-05, - "loss": 0.7266, - "step": 7409 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2555263197881498e-05, - "loss": 0.6456, - "step": 7410 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2553334826089084e-05, - "loss": 0.7006, - "step": 7411 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2551406352720636e-05, - "loss": 0.6147, - "step": 7412 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2549477777852874e-05, - "loss": 0.7464, - "step": 7413 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2547549101562517e-05, - "loss": 0.6696, - "step": 7414 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2545620323926292e-05, - "loss": 0.549, - "step": 7415 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.254369144502093e-05, - "loss": 0.5331, - "step": 7416 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2541762464923166e-05, - "loss": 0.6259, - "step": 7417 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2539833383709739e-05, - "loss": 0.5891, - "step": 7418 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2537904201457383e-05, - "loss": 0.7355, - "step": 7419 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2535974918242855e-05, - "loss": 0.6706, - "step": 7420 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2534045534142899e-05, - "loss": 0.7262, - "step": 7421 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2532116049234268e-05, - "loss": 0.6406, - "step": 7422 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2530186463593728e-05, - "loss": 0.6802, - "step": 7423 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.252825677729803e-05, - "loss": 0.579, - "step": 7424 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2526326990423952e-05, - "loss": 0.672, - "step": 7425 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2524397103048256e-05, - "loss": 0.5698, - "step": 7426 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2522467115247717e-05, - "loss": 0.6809, - "step": 7427 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2520537027099118e-05, - "loss": 0.6815, - "step": 7428 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2518606838679234e-05, - "loss": 0.7231, - "step": 7429 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2516676550064859e-05, - "loss": 0.6523, - "step": 7430 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2514746161332778e-05, - "loss": 0.6492, - "step": 7431 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2512815672559789e-05, - "loss": 0.5785, - "step": 7432 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2510885083822685e-05, - "loss": 0.5637, - "step": 7433 - }, - { - "epoch": 1.34, - "grad_norm": 0.0, - "learning_rate": 1.2508954395198272e-05, - "loss": 0.5815, - "step": 7434 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2507023606763355e-05, - "loss": 0.6424, - "step": 7435 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2505092718594748e-05, - "loss": 0.5608, - "step": 7436 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2503161730769255e-05, - "loss": 0.6357, - "step": 7437 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2501230643363703e-05, - "loss": 0.7371, - "step": 7438 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2499299456454911e-05, - "loss": 0.684, - "step": 7439 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2497368170119704e-05, - "loss": 0.5773, - "step": 7440 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2495436784434913e-05, - "loss": 0.6657, - "step": 7441 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2493505299477374e-05, - "loss": 0.6363, - "step": 7442 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2491573715323922e-05, - "loss": 0.5603, - "step": 7443 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.24896420320514e-05, - "loss": 0.6766, - "step": 7444 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2487710249736651e-05, - "loss": 0.7465, - "step": 7445 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2485778368456525e-05, - "loss": 0.6974, - "step": 7446 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2483846388287882e-05, - "loss": 0.7118, - "step": 7447 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2481914309307569e-05, - "loss": 0.7203, - "step": 7448 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2479982131592457e-05, - "loss": 0.5736, - "step": 7449 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2478049855219404e-05, - "loss": 0.6716, - "step": 7450 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2476117480265286e-05, - "loss": 0.4661, - "step": 7451 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2474185006806974e-05, - "loss": 0.6478, - "step": 7452 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2472252434921341e-05, - "loss": 0.6512, - "step": 7453 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2470319764685268e-05, - "loss": 0.6692, - "step": 7454 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.246838699617565e-05, - "loss": 0.7643, - "step": 7455 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2466454129469363e-05, - "loss": 0.6923, - "step": 7456 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2464521164643308e-05, - "loss": 0.7045, - "step": 7457 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2462588101774382e-05, - "loss": 0.6603, - "step": 7458 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2460654940939477e-05, - "loss": 0.6121, - "step": 7459 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2458721682215505e-05, - "loss": 0.6953, - "step": 7460 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2456788325679374e-05, - "loss": 0.592, - "step": 7461 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2454854871407993e-05, - "loss": 0.6249, - "step": 7462 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2452921319478281e-05, - "loss": 0.6618, - "step": 7463 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2450987669967157e-05, - "loss": 0.6734, - "step": 7464 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2449053922951542e-05, - "loss": 0.7195, - "step": 7465 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.244712007850837e-05, - "loss": 0.7213, - "step": 7466 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2445186136714566e-05, - "loss": 0.6299, - "step": 7467 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.244325209764707e-05, - "loss": 0.6316, - "step": 7468 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2441317961382823e-05, - "loss": 0.6159, - "step": 7469 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2439383727998762e-05, - "loss": 0.7219, - "step": 7470 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2437449397571837e-05, - "loss": 0.6203, - "step": 7471 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2435514970179001e-05, - "loss": 0.6553, - "step": 7472 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2433580445897204e-05, - "loss": 0.8027, - "step": 7473 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2431645824803414e-05, - "loss": 0.7222, - "step": 7474 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2429711106974582e-05, - "loss": 0.6869, - "step": 7475 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.242777629248768e-05, - "loss": 0.673, - "step": 7476 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2425841381419683e-05, - "loss": 0.6224, - "step": 7477 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2423906373847555e-05, - "loss": 0.795, - "step": 7478 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2421971269848281e-05, - "loss": 0.6309, - "step": 7479 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2420036069498841e-05, - "loss": 0.6152, - "step": 7480 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2418100772876223e-05, - "loss": 0.6947, - "step": 7481 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.241616538005741e-05, - "loss": 0.7961, - "step": 7482 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2414229891119401e-05, - "loss": 0.6258, - "step": 7483 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2412294306139188e-05, - "loss": 0.6178, - "step": 7484 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2410358625193779e-05, - "loss": 0.5685, - "step": 7485 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2408422848360171e-05, - "loss": 0.6104, - "step": 7486 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2406486975715379e-05, - "loss": 0.7206, - "step": 7487 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2404551007336412e-05, - "loss": 0.5692, - "step": 7488 - }, - { - "epoch": 1.35, - "grad_norm": 0.0, - "learning_rate": 1.2402614943300287e-05, - "loss": 0.7046, - "step": 7489 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2400678783684023e-05, - "loss": 0.6484, - "step": 7490 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2398742528564645e-05, - "loss": 0.5811, - "step": 7491 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2396806178019181e-05, - "loss": 0.5962, - "step": 7492 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.239486973212466e-05, - "loss": 0.5723, - "step": 7493 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2392933190958114e-05, - "loss": 0.6764, - "step": 7494 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.239099655459659e-05, - "loss": 0.6167, - "step": 7495 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.238905982311713e-05, - "loss": 0.6286, - "step": 7496 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2387122996596774e-05, - "loss": 0.5789, - "step": 7497 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.238518607511258e-05, - "loss": 0.6017, - "step": 7498 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2383249058741594e-05, - "loss": 0.6378, - "step": 7499 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2381311947560879e-05, - "loss": 0.5452, - "step": 7500 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2379374741647494e-05, - "loss": 0.6801, - "step": 7501 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2377437441078503e-05, - "loss": 0.699, - "step": 7502 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2375500045930984e-05, - "loss": 0.5792, - "step": 7503 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2373562556282002e-05, - "loss": 0.6233, - "step": 7504 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2371624972208634e-05, - "loss": 0.6674, - "step": 7505 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2369687293787962e-05, - "loss": 0.6332, - "step": 7506 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.236774952109707e-05, - "loss": 0.6597, - "step": 7507 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2365811654213047e-05, - "loss": 0.6862, - "step": 7508 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2363873693212982e-05, - "loss": 0.5308, - "step": 7509 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2361935638173974e-05, - "loss": 0.7665, - "step": 7510 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2359997489173118e-05, - "loss": 0.7904, - "step": 7511 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2358059246287519e-05, - "loss": 0.6371, - "step": 7512 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2356120909594282e-05, - "loss": 0.6821, - "step": 7513 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2354182479170523e-05, - "loss": 0.7063, - "step": 7514 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2352243955093353e-05, - "loss": 0.5152, - "step": 7515 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2350305337439882e-05, - "loss": 0.6598, - "step": 7516 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2348366626287244e-05, - "loss": 0.7273, - "step": 7517 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2346427821712557e-05, - "loss": 0.6899, - "step": 7518 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2344488923792952e-05, - "loss": 0.6736, - "step": 7519 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.234254993260556e-05, - "loss": 0.5669, - "step": 7520 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2340610848227519e-05, - "loss": 0.6613, - "step": 7521 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2338671670735968e-05, - "loss": 0.5958, - "step": 7522 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2336732400208057e-05, - "loss": 0.6752, - "step": 7523 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2334793036720923e-05, - "loss": 0.6217, - "step": 7524 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2332853580351722e-05, - "loss": 0.6008, - "step": 7525 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2330914031177612e-05, - "loss": 0.5682, - "step": 7526 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2328974389275747e-05, - "loss": 0.6781, - "step": 7527 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.232703465472329e-05, - "loss": 0.6304, - "step": 7528 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.232509482759741e-05, - "loss": 0.5731, - "step": 7529 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2323154907975274e-05, - "loss": 0.68, - "step": 7530 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2321214895934056e-05, - "loss": 0.6563, - "step": 7531 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2319274791550932e-05, - "loss": 0.5783, - "step": 7532 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2317334594903085e-05, - "loss": 0.5457, - "step": 7533 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2315394306067701e-05, - "loss": 0.6171, - "step": 7534 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2313453925121958e-05, - "loss": 0.6836, - "step": 7535 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.231151345214306e-05, - "loss": 0.599, - "step": 7536 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2309572887208194e-05, - "loss": 0.6621, - "step": 7537 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2307632230394564e-05, - "loss": 0.641, - "step": 7538 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.230569148177937e-05, - "loss": 0.6203, - "step": 7539 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2303750641439818e-05, - "loss": 0.5839, - "step": 7540 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.230180970945312e-05, - "loss": 0.6867, - "step": 7541 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.229986868589649e-05, - "loss": 0.6818, - "step": 7542 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2297927570847142e-05, - "loss": 0.6271, - "step": 7543 - }, - { - "epoch": 1.36, - "grad_norm": 0.0, - "learning_rate": 1.2295986364382297e-05, - "loss": 0.5732, - "step": 7544 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2294045066579186e-05, - "loss": 0.5667, - "step": 7545 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2292103677515027e-05, - "loss": 0.6143, - "step": 7546 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.229016219726706e-05, - "loss": 0.6464, - "step": 7547 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2288220625912516e-05, - "loss": 0.5651, - "step": 7548 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2286278963528639e-05, - "loss": 0.7113, - "step": 7549 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2284337210192667e-05, - "loss": 0.5442, - "step": 7550 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2282395365981843e-05, - "loss": 0.6797, - "step": 7551 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2280453430973425e-05, - "loss": 0.6259, - "step": 7552 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2278511405244665e-05, - "loss": 0.6264, - "step": 7553 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2276569288872815e-05, - "loss": 0.8161, - "step": 7554 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.227462708193514e-05, - "loss": 0.7066, - "step": 7555 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2272684784508902e-05, - "loss": 0.644, - "step": 7556 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.227074239667137e-05, - "loss": 0.7241, - "step": 7557 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2268799918499816e-05, - "loss": 0.6244, - "step": 7558 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2266857350071512e-05, - "loss": 0.6691, - "step": 7559 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2264914691463742e-05, - "loss": 0.6142, - "step": 7560 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2262971942753788e-05, - "loss": 0.7697, - "step": 7561 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2261029104018928e-05, - "loss": 0.6658, - "step": 7562 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2259086175336461e-05, - "loss": 0.6246, - "step": 7563 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2257143156783675e-05, - "loss": 0.6499, - "step": 7564 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2255200048437865e-05, - "loss": 0.6869, - "step": 7565 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2253256850376337e-05, - "loss": 0.721, - "step": 7566 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2251313562676389e-05, - "loss": 0.6792, - "step": 7567 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2249370185415334e-05, - "loss": 0.6078, - "step": 7568 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2247426718670476e-05, - "loss": 0.6016, - "step": 7569 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2245483162519134e-05, - "loss": 0.6101, - "step": 7570 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2243539517038625e-05, - "loss": 0.7086, - "step": 7571 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2241595782306272e-05, - "loss": 0.641, - "step": 7572 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.22396519583994e-05, - "loss": 0.5892, - "step": 7573 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2237708045395333e-05, - "loss": 0.5316, - "step": 7574 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2235764043371408e-05, - "loss": 0.6674, - "step": 7575 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2233819952404959e-05, - "loss": 0.6142, - "step": 7576 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2231875772573326e-05, - "loss": 0.6886, - "step": 7577 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2229931503953849e-05, - "loss": 0.6974, - "step": 7578 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2227987146623878e-05, - "loss": 0.7044, - "step": 7579 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2226042700660766e-05, - "loss": 0.5776, - "step": 7580 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2224098166141855e-05, - "loss": 0.6199, - "step": 7581 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2222153543144513e-05, - "loss": 0.6169, - "step": 7582 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2220208831746097e-05, - "loss": 0.5722, - "step": 7583 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2218264032023967e-05, - "loss": 0.6686, - "step": 7584 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2216319144055497e-05, - "loss": 0.6821, - "step": 7585 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2214374167918051e-05, - "loss": 0.5606, - "step": 7586 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2212429103689013e-05, - "loss": 0.6329, - "step": 7587 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2210483951445751e-05, - "loss": 0.6696, - "step": 7588 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2208538711265647e-05, - "loss": 0.6716, - "step": 7589 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2206593383226095e-05, - "loss": 0.6657, - "step": 7590 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2204647967404478e-05, - "loss": 0.6696, - "step": 7591 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2202702463878186e-05, - "loss": 0.5734, - "step": 7592 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2200756872724617e-05, - "loss": 0.637, - "step": 7593 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.219881119402117e-05, - "loss": 0.5944, - "step": 7594 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2196865427845248e-05, - "loss": 0.5723, - "step": 7595 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2194919574274253e-05, - "loss": 0.7113, - "step": 7596 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.21929736333856e-05, - "loss": 0.6176, - "step": 7597 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2191027605256696e-05, - "loss": 0.5196, - "step": 7598 - }, - { - "epoch": 1.37, - "grad_norm": 0.0, - "learning_rate": 1.2189081489964964e-05, - "loss": 0.6153, - "step": 7599 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2187135287587818e-05, - "loss": 0.7559, - "step": 7600 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2185188998202684e-05, - "loss": 0.7443, - "step": 7601 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2183242621886988e-05, - "loss": 0.6897, - "step": 7602 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.218129615871816e-05, - "loss": 0.6669, - "step": 7603 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2179349608773636e-05, - "loss": 0.6421, - "step": 7604 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2177402972130847e-05, - "loss": 0.6664, - "step": 7605 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2175456248867243e-05, - "loss": 0.6302, - "step": 7606 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2173509439060261e-05, - "loss": 0.7265, - "step": 7607 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2171562542787347e-05, - "loss": 0.6701, - "step": 7608 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2169615560125958e-05, - "loss": 0.6935, - "step": 7609 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2167668491153546e-05, - "loss": 0.5324, - "step": 7610 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2165721335947567e-05, - "loss": 0.6336, - "step": 7611 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2163774094585482e-05, - "loss": 0.665, - "step": 7612 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2161826767144756e-05, - "loss": 0.6932, - "step": 7613 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2159879353702862e-05, - "loss": 0.6876, - "step": 7614 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2157931854337264e-05, - "loss": 0.6369, - "step": 7615 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2155984269125439e-05, - "loss": 0.6404, - "step": 7616 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.215403659814487e-05, - "loss": 0.5988, - "step": 7617 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.215208884147303e-05, - "loss": 0.6968, - "step": 7618 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2150140999187412e-05, - "loss": 0.7338, - "step": 7619 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2148193071365503e-05, - "loss": 0.6445, - "step": 7620 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2146245058084794e-05, - "loss": 0.5901, - "step": 7621 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2144296959422777e-05, - "loss": 0.6173, - "step": 7622 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2142348775456956e-05, - "loss": 0.5864, - "step": 7623 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2140400506264826e-05, - "loss": 0.6618, - "step": 7624 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2138452151923903e-05, - "loss": 0.6157, - "step": 7625 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2136503712511689e-05, - "loss": 0.5343, - "step": 7626 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2134555188105691e-05, - "loss": 0.6316, - "step": 7627 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.213260657878344e-05, - "loss": 0.6273, - "step": 7628 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2130657884622441e-05, - "loss": 0.6805, - "step": 7629 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2128709105700223e-05, - "loss": 0.8006, - "step": 7630 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2126760242094308e-05, - "loss": 0.6269, - "step": 7631 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2124811293882226e-05, - "loss": 0.6744, - "step": 7632 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2122862261141518e-05, - "loss": 0.6002, - "step": 7633 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2120913143949707e-05, - "loss": 0.5826, - "step": 7634 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2118963942384336e-05, - "loss": 0.783, - "step": 7635 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2117014656522955e-05, - "loss": 0.7349, - "step": 7636 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2115065286443098e-05, - "loss": 0.5577, - "step": 7637 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2113115832222324e-05, - "loss": 0.7111, - "step": 7638 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2111166293938182e-05, - "loss": 0.6145, - "step": 7639 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.210921667166823e-05, - "loss": 0.5953, - "step": 7640 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2107266965490023e-05, - "loss": 0.6976, - "step": 7641 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2105317175481126e-05, - "loss": 0.6895, - "step": 7642 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2103367301719104e-05, - "loss": 0.5937, - "step": 7643 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2101417344281534e-05, - "loss": 0.612, - "step": 7644 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2099467303245977e-05, - "loss": 0.7281, - "step": 7645 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2097517178690012e-05, - "loss": 0.732, - "step": 7646 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2095566970691223e-05, - "loss": 0.5614, - "step": 7647 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.209361667932719e-05, - "loss": 0.7055, - "step": 7648 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.20916663046755e-05, - "loss": 0.6553, - "step": 7649 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2089715846813742e-05, - "loss": 0.5347, - "step": 7650 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2087765305819503e-05, - "loss": 0.57, - "step": 7651 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2085814681770389e-05, - "loss": 0.6396, - "step": 7652 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2083863974743993e-05, - "loss": 0.6624, - "step": 7653 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2081913184817913e-05, - "loss": 0.5941, - "step": 7654 - }, - { - "epoch": 1.38, - "grad_norm": 0.0, - "learning_rate": 1.2079962312069766e-05, - "loss": 0.6639, - "step": 7655 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.207801135657715e-05, - "loss": 0.784, - "step": 7656 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2076060318417686e-05, - "loss": 0.7298, - "step": 7657 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2074109197668985e-05, - "loss": 0.7036, - "step": 7658 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2072157994408669e-05, - "loss": 0.6248, - "step": 7659 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2070206708714356e-05, - "loss": 0.6888, - "step": 7660 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2068255340663674e-05, - "loss": 0.7401, - "step": 7661 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.206630389033425e-05, - "loss": 0.6558, - "step": 7662 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2064352357803722e-05, - "loss": 0.654, - "step": 7663 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2062400743149713e-05, - "loss": 0.7153, - "step": 7664 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2060449046449875e-05, - "loss": 0.7157, - "step": 7665 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2058497267781843e-05, - "loss": 0.673, - "step": 7666 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2056545407223263e-05, - "loss": 0.5895, - "step": 7667 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2054593464851785e-05, - "loss": 0.5813, - "step": 7668 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2052641440745059e-05, - "loss": 0.7145, - "step": 7669 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2050689334980739e-05, - "loss": 0.7371, - "step": 7670 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2048737147636484e-05, - "loss": 0.6587, - "step": 7671 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2046784878789958e-05, - "loss": 0.6476, - "step": 7672 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.204483252851882e-05, - "loss": 0.7057, - "step": 7673 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2042880096900743e-05, - "loss": 0.6673, - "step": 7674 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2040927584013394e-05, - "loss": 0.673, - "step": 7675 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2038974989934451e-05, - "loss": 0.6158, - "step": 7676 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.203702231474159e-05, - "loss": 0.7041, - "step": 7677 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2035069558512493e-05, - "loss": 0.5664, - "step": 7678 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.203311672132484e-05, - "loss": 0.6208, - "step": 7679 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2031163803256323e-05, - "loss": 0.6617, - "step": 7680 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2029210804384627e-05, - "loss": 0.5622, - "step": 7681 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2027257724787455e-05, - "loss": 0.7324, - "step": 7682 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2025304564542493e-05, - "loss": 0.7189, - "step": 7683 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2023351323727446e-05, - "loss": 0.634, - "step": 7684 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.202139800242002e-05, - "loss": 0.6251, - "step": 7685 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2019444600697917e-05, - "loss": 0.7124, - "step": 7686 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2017491118638848e-05, - "loss": 0.6186, - "step": 7687 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2015537556320526e-05, - "loss": 0.6611, - "step": 7688 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2013583913820668e-05, - "loss": 0.5771, - "step": 7689 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2011630191216992e-05, - "loss": 0.6659, - "step": 7690 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.200967638858722e-05, - "loss": 0.6514, - "step": 7691 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2007722506009079e-05, - "loss": 0.6273, - "step": 7692 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2005768543560299e-05, - "loss": 0.5296, - "step": 7693 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.200381450131861e-05, - "loss": 0.6278, - "step": 7694 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.2001860379361746e-05, - "loss": 0.7523, - "step": 7695 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1999906177767447e-05, - "loss": 0.6688, - "step": 7696 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1997951896613454e-05, - "loss": 0.5544, - "step": 7697 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1995997535977513e-05, - "loss": 0.5497, - "step": 7698 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1994043095937369e-05, - "loss": 0.6173, - "step": 7699 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1992088576570772e-05, - "loss": 0.6759, - "step": 7700 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1990133977955487e-05, - "loss": 0.5971, - "step": 7701 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1988179300169256e-05, - "loss": 0.5931, - "step": 7702 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1986224543289849e-05, - "loss": 0.5985, - "step": 7703 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1984269707395026e-05, - "loss": 0.6802, - "step": 7704 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1982314792562558e-05, - "loss": 0.5884, - "step": 7705 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.198035979887021e-05, - "loss": 0.7003, - "step": 7706 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1978404726395757e-05, - "loss": 0.6877, - "step": 7707 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1976449575216973e-05, - "loss": 0.6666, - "step": 7708 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1974494345411637e-05, - "loss": 0.6752, - "step": 7709 - }, - { - "epoch": 1.39, - "grad_norm": 0.0, - "learning_rate": 1.1972539037057538e-05, - "loss": 0.4851, - "step": 7710 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1970583650232454e-05, - "loss": 0.588, - "step": 7711 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1968628185014182e-05, - "loss": 0.6406, - "step": 7712 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1966672641480501e-05, - "loss": 0.5857, - "step": 7713 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1964717019709217e-05, - "loss": 0.771, - "step": 7714 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1962761319778124e-05, - "loss": 0.7305, - "step": 7715 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1960805541765024e-05, - "loss": 0.5638, - "step": 7716 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1958849685747717e-05, - "loss": 0.6176, - "step": 7717 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1956893751804018e-05, - "loss": 0.5486, - "step": 7718 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.195493774001173e-05, - "loss": 0.7684, - "step": 7719 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1952981650448674e-05, - "loss": 0.6193, - "step": 7720 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1951025483192658e-05, - "loss": 0.6827, - "step": 7721 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1949069238321508e-05, - "loss": 0.6658, - "step": 7722 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1947112915913045e-05, - "loss": 0.6567, - "step": 7723 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.194515651604509e-05, - "loss": 0.6902, - "step": 7724 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1943200038795482e-05, - "loss": 0.6281, - "step": 7725 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1941243484242047e-05, - "loss": 0.5917, - "step": 7726 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.193928685246262e-05, - "loss": 0.6714, - "step": 7727 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.193733014353504e-05, - "loss": 0.6462, - "step": 7728 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1935373357537146e-05, - "loss": 0.6056, - "step": 7729 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1933416494546785e-05, - "loss": 0.662, - "step": 7730 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1931459554641806e-05, - "loss": 0.5445, - "step": 7731 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1929502537900056e-05, - "loss": 0.6485, - "step": 7732 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1927545444399389e-05, - "loss": 0.5725, - "step": 7733 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1925588274217664e-05, - "loss": 0.4931, - "step": 7734 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1923631027432736e-05, - "loss": 0.6031, - "step": 7735 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1921673704122473e-05, - "loss": 0.6944, - "step": 7736 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1919716304364736e-05, - "loss": 0.6926, - "step": 7737 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1917758828237394e-05, - "loss": 0.5687, - "step": 7738 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1915801275818325e-05, - "loss": 0.591, - "step": 7739 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1913843647185397e-05, - "loss": 0.6408, - "step": 7740 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.191188594241649e-05, - "loss": 0.554, - "step": 7741 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1909928161589488e-05, - "loss": 0.6604, - "step": 7742 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1907970304782266e-05, - "loss": 0.6505, - "step": 7743 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1906012372072722e-05, - "loss": 0.6382, - "step": 7744 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1904054363538737e-05, - "loss": 0.6689, - "step": 7745 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1902096279258211e-05, - "loss": 0.597, - "step": 7746 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1900138119309035e-05, - "loss": 0.6854, - "step": 7747 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1898179883769108e-05, - "loss": 0.6498, - "step": 7748 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1896221572716334e-05, - "loss": 0.5138, - "step": 7749 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.189426318622862e-05, - "loss": 0.656, - "step": 7750 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1892304724383868e-05, - "loss": 0.5655, - "step": 7751 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1890346187259994e-05, - "loss": 0.7589, - "step": 7752 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.188838757493491e-05, - "loss": 0.7152, - "step": 7753 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1886428887486535e-05, - "loss": 0.676, - "step": 7754 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1884470124992784e-05, - "loss": 0.5753, - "step": 7755 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1882511287531584e-05, - "loss": 0.6927, - "step": 7756 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.188055237518086e-05, - "loss": 0.6939, - "step": 7757 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1878593388018545e-05, - "loss": 0.6416, - "step": 7758 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1876634326122562e-05, - "loss": 0.6779, - "step": 7759 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1874675189570852e-05, - "loss": 0.6679, - "step": 7760 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.187271597844135e-05, - "loss": 0.636, - "step": 7761 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1870756692812003e-05, - "loss": 0.6655, - "step": 7762 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1868797332760746e-05, - "loss": 0.6354, - "step": 7763 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1866837898365533e-05, - "loss": 0.6906, - "step": 7764 - }, - { - "epoch": 1.4, - "grad_norm": 0.0, - "learning_rate": 1.1864878389704306e-05, - "loss": 0.6209, - "step": 7765 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1862918806855026e-05, - "loss": 0.6999, - "step": 7766 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1860959149895641e-05, - "loss": 0.578, - "step": 7767 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1858999418904113e-05, - "loss": 0.6715, - "step": 7768 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1857039613958408e-05, - "loss": 0.6557, - "step": 7769 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1855079735136482e-05, - "loss": 0.6075, - "step": 7770 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1853119782516306e-05, - "loss": 0.5849, - "step": 7771 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1851159756175852e-05, - "loss": 0.6708, - "step": 7772 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1849199656193091e-05, - "loss": 0.7322, - "step": 7773 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1847239482646e-05, - "loss": 0.6467, - "step": 7774 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1845279235612557e-05, - "loss": 0.6057, - "step": 7775 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1843318915170747e-05, - "loss": 0.5965, - "step": 7776 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1841358521398554e-05, - "loss": 0.5686, - "step": 7777 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1839398054373959e-05, - "loss": 0.7189, - "step": 7778 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1837437514174961e-05, - "loss": 0.6397, - "step": 7779 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1835476900879551e-05, - "loss": 0.5792, - "step": 7780 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1833516214565727e-05, - "loss": 0.6417, - "step": 7781 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1831555455311487e-05, - "loss": 0.6596, - "step": 7782 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1829594623194833e-05, - "loss": 0.5543, - "step": 7783 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.182763371829377e-05, - "loss": 0.5004, - "step": 7784 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1825672740686307e-05, - "loss": 0.6393, - "step": 7785 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1823711690450452e-05, - "loss": 0.572, - "step": 7786 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1821750567664225e-05, - "loss": 0.7289, - "step": 7787 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1819789372405641e-05, - "loss": 0.5737, - "step": 7788 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1817828104752716e-05, - "loss": 0.645, - "step": 7789 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1815866764783475e-05, - "loss": 0.6517, - "step": 7790 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1813905352575942e-05, - "loss": 0.5526, - "step": 7791 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.181194386820815e-05, - "loss": 0.6826, - "step": 7792 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1809982311758123e-05, - "loss": 0.6651, - "step": 7793 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.18080206833039e-05, - "loss": 0.7209, - "step": 7794 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1806058982923521e-05, - "loss": 0.7991, - "step": 7795 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1804097210695019e-05, - "loss": 0.5837, - "step": 7796 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1802135366696437e-05, - "loss": 0.6949, - "step": 7797 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1800173451005826e-05, - "loss": 0.5695, - "step": 7798 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1798211463701228e-05, - "loss": 0.6057, - "step": 7799 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.17962494048607e-05, - "loss": 0.7885, - "step": 7800 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1794287274562295e-05, - "loss": 0.6248, - "step": 7801 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1792325072884064e-05, - "loss": 0.7262, - "step": 7802 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1790362799904076e-05, - "loss": 0.6709, - "step": 7803 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1788400455700385e-05, - "loss": 0.5956, - "step": 7804 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1786438040351062e-05, - "loss": 0.6668, - "step": 7805 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1784475553934174e-05, - "loss": 0.7439, - "step": 7806 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.178251299652779e-05, - "loss": 0.7147, - "step": 7807 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1780550368209986e-05, - "loss": 0.6226, - "step": 7808 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1778587669058839e-05, - "loss": 0.5881, - "step": 7809 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1776624899152427e-05, - "loss": 0.6081, - "step": 7810 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.177466205856883e-05, - "loss": 0.6503, - "step": 7811 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1772699147386142e-05, - "loss": 0.6343, - "step": 7812 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1770736165682439e-05, - "loss": 0.6821, - "step": 7813 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1768773113535823e-05, - "loss": 0.6864, - "step": 7814 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1766809991024381e-05, - "loss": 0.6517, - "step": 7815 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1764846798226208e-05, - "loss": 0.6663, - "step": 7816 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1762883535219407e-05, - "loss": 0.6319, - "step": 7817 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1760920202082082e-05, - "loss": 0.5918, - "step": 7818 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1758956798892331e-05, - "loss": 0.623, - "step": 7819 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.175699332572827e-05, - "loss": 0.6235, - "step": 7820 - }, - { - "epoch": 1.41, - "grad_norm": 0.0, - "learning_rate": 1.1755029782667999e-05, - "loss": 0.6268, - "step": 7821 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1753066169789643e-05, - "loss": 0.6338, - "step": 7822 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1751102487171307e-05, - "loss": 0.7305, - "step": 7823 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1749138734891114e-05, - "loss": 0.7486, - "step": 7824 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1747174913027185e-05, - "loss": 0.6297, - "step": 7825 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1745211021657647e-05, - "loss": 0.5681, - "step": 7826 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1743247060860627e-05, - "loss": 0.6457, - "step": 7827 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.174128303071425e-05, - "loss": 0.6807, - "step": 7828 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1739318931296653e-05, - "loss": 0.599, - "step": 7829 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1737354762685967e-05, - "loss": 0.5922, - "step": 7830 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1735390524960333e-05, - "loss": 0.6107, - "step": 7831 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1733426218197889e-05, - "loss": 0.6491, - "step": 7832 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1731461842476785e-05, - "loss": 0.5521, - "step": 7833 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.172949739787516e-05, - "loss": 0.6759, - "step": 7834 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1727532884471165e-05, - "loss": 0.7508, - "step": 7835 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1725568302342953e-05, - "loss": 0.5705, - "step": 7836 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1723603651568677e-05, - "loss": 0.6126, - "step": 7837 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1721638932226498e-05, - "loss": 0.6371, - "step": 7838 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.171967414439457e-05, - "loss": 0.5705, - "step": 7839 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.171770928815106e-05, - "loss": 0.6762, - "step": 7840 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1715744363574133e-05, - "loss": 0.6218, - "step": 7841 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1713779370741956e-05, - "loss": 0.4964, - "step": 7842 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1711814309732697e-05, - "loss": 0.606, - "step": 7843 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1709849180624533e-05, - "loss": 0.6262, - "step": 7844 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.170788398349564e-05, - "loss": 0.626, - "step": 7845 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1705918718424196e-05, - "loss": 0.6294, - "step": 7846 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1703953385488384e-05, - "loss": 0.6343, - "step": 7847 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1701987984766388e-05, - "loss": 0.6697, - "step": 7848 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1700022516336394e-05, - "loss": 0.6591, - "step": 7849 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.169805698027659e-05, - "loss": 0.6345, - "step": 7850 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.169609137666517e-05, - "loss": 0.603, - "step": 7851 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1694125705580333e-05, - "loss": 0.6851, - "step": 7852 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1692159967100272e-05, - "loss": 0.6146, - "step": 7853 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1690194161303185e-05, - "loss": 0.638, - "step": 7854 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1688228288267283e-05, - "loss": 0.6011, - "step": 7855 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1686262348070768e-05, - "loss": 0.6328, - "step": 7856 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1684296340791844e-05, - "loss": 0.6835, - "step": 7857 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.168233026650873e-05, - "loss": 0.6436, - "step": 7858 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1680364125299633e-05, - "loss": 0.6473, - "step": 7859 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1678397917242776e-05, - "loss": 0.7043, - "step": 7860 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1676431642416373e-05, - "loss": 0.5934, - "step": 7861 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1674465300898645e-05, - "loss": 0.5367, - "step": 7862 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.167249889276782e-05, - "loss": 0.6297, - "step": 7863 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1670532418102126e-05, - "loss": 0.66, - "step": 7864 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.166856587697979e-05, - "loss": 0.6751, - "step": 7865 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1666599269479045e-05, - "loss": 0.6257, - "step": 7866 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1664632595678125e-05, - "loss": 0.6755, - "step": 7867 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1662665855655269e-05, - "loss": 0.6463, - "step": 7868 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1660699049488717e-05, - "loss": 0.596, - "step": 7869 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1658732177256711e-05, - "loss": 0.601, - "step": 7870 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.16567652390375e-05, - "loss": 0.7124, - "step": 7871 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1654798234909326e-05, - "loss": 0.7235, - "step": 7872 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1652831164950445e-05, - "loss": 0.6399, - "step": 7873 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.165086402923911e-05, - "loss": 0.611, - "step": 7874 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.1648896827853575e-05, - "loss": 0.7051, - "step": 7875 - }, - { - "epoch": 1.42, - "grad_norm": 0.0, - "learning_rate": 1.16469295608721e-05, - "loss": 0.6534, - "step": 7876 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1644962228372944e-05, - "loss": 0.6449, - "step": 7877 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1642994830434373e-05, - "loss": 0.6402, - "step": 7878 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1641027367134657e-05, - "loss": 0.6808, - "step": 7879 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1639059838552057e-05, - "loss": 0.6558, - "step": 7880 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1637092244764846e-05, - "loss": 0.6984, - "step": 7881 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1635124585851307e-05, - "loss": 0.6225, - "step": 7882 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1633156861889707e-05, - "loss": 0.6154, - "step": 7883 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.163118907295833e-05, - "loss": 0.628, - "step": 7884 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.162922121913546e-05, - "loss": 0.6035, - "step": 7885 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1627253300499378e-05, - "loss": 0.6959, - "step": 7886 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1625285317128369e-05, - "loss": 0.7238, - "step": 7887 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1623317269100728e-05, - "loss": 0.6465, - "step": 7888 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1621349156494745e-05, - "loss": 0.7015, - "step": 7889 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1619380979388718e-05, - "loss": 0.61, - "step": 7890 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1617412737860935e-05, - "loss": 0.6602, - "step": 7891 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1615444431989706e-05, - "loss": 0.6117, - "step": 7892 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.161347606185333e-05, - "loss": 0.6202, - "step": 7893 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1611507627530113e-05, - "loss": 0.6255, - "step": 7894 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1609539129098361e-05, - "loss": 0.657, - "step": 7895 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1607570566636387e-05, - "loss": 0.621, - "step": 7896 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1605601940222498e-05, - "loss": 0.7147, - "step": 7897 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1603633249935018e-05, - "loss": 0.5949, - "step": 7898 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1601664495852258e-05, - "loss": 0.7494, - "step": 7899 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1599695678052539e-05, - "loss": 0.6302, - "step": 7900 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1597726796614189e-05, - "loss": 0.7056, - "step": 7901 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1595757851615525e-05, - "loss": 0.6878, - "step": 7902 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1593788843134883e-05, - "loss": 0.6688, - "step": 7903 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.159181977125059e-05, - "loss": 0.7117, - "step": 7904 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.158985063604098e-05, - "loss": 0.5883, - "step": 7905 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1587881437584387e-05, - "loss": 0.6013, - "step": 7906 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1585912175959153e-05, - "loss": 0.5893, - "step": 7907 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1583942851243613e-05, - "loss": 0.6892, - "step": 7908 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1581973463516113e-05, - "loss": 0.7119, - "step": 7909 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1580004012854999e-05, - "loss": 0.6901, - "step": 7910 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1578034499338619e-05, - "loss": 0.6433, - "step": 7911 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1576064923045326e-05, - "loss": 0.6234, - "step": 7912 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1574095284053466e-05, - "loss": 0.6886, - "step": 7913 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1572125582441402e-05, - "loss": 0.6521, - "step": 7914 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1570155818287487e-05, - "loss": 0.5224, - "step": 7915 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1568185991670082e-05, - "loss": 0.6518, - "step": 7916 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1566216102667557e-05, - "loss": 0.7769, - "step": 7917 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.156424615135827e-05, - "loss": 0.5339, - "step": 7918 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1562276137820589e-05, - "loss": 0.7467, - "step": 7919 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1560306062132894e-05, - "loss": 0.5742, - "step": 7920 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1558335924373543e-05, - "loss": 0.6621, - "step": 7921 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1556365724620924e-05, - "loss": 0.5868, - "step": 7922 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1554395462953406e-05, - "loss": 0.613, - "step": 7923 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1552425139449377e-05, - "loss": 0.6996, - "step": 7924 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1550454754187218e-05, - "loss": 0.5977, - "step": 7925 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.154848430724531e-05, - "loss": 0.5601, - "step": 7926 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1546513798702041e-05, - "loss": 0.6217, - "step": 7927 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1544543228635812e-05, - "loss": 0.6666, - "step": 7928 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1542572597124999e-05, - "loss": 0.596, - "step": 7929 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.154060190424801e-05, - "loss": 0.6232, - "step": 7930 - }, - { - "epoch": 1.43, - "grad_norm": 0.0, - "learning_rate": 1.1538631150083237e-05, - "loss": 0.6233, - "step": 7931 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.153666033470908e-05, - "loss": 0.6193, - "step": 7932 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1534689458203945e-05, - "loss": 0.6197, - "step": 7933 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1532718520646231e-05, - "loss": 0.6921, - "step": 7934 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1530747522114351e-05, - "loss": 0.6705, - "step": 7935 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1528776462686712e-05, - "loss": 0.63, - "step": 7936 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1526805342441726e-05, - "loss": 0.718, - "step": 7937 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1524834161457805e-05, - "loss": 0.6338, - "step": 7938 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1522862919813376e-05, - "loss": 0.6304, - "step": 7939 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1520891617586845e-05, - "loss": 0.688, - "step": 7940 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.151892025485664e-05, - "loss": 0.6372, - "step": 7941 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.151694883170119e-05, - "loss": 0.7671, - "step": 7942 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1514977348198913e-05, - "loss": 0.6123, - "step": 7943 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1513005804428244e-05, - "loss": 0.5887, - "step": 7944 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.151103420046761e-05, - "loss": 0.651, - "step": 7945 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1509062536395446e-05, - "loss": 0.5925, - "step": 7946 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1507090812290193e-05, - "loss": 0.7206, - "step": 7947 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1505119028230283e-05, - "loss": 0.5871, - "step": 7948 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.150314718429416e-05, - "loss": 0.6269, - "step": 7949 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1501175280560266e-05, - "loss": 0.7344, - "step": 7950 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.149920331710705e-05, - "loss": 0.5636, - "step": 7951 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1497231294012956e-05, - "loss": 0.6428, - "step": 7952 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1495259211356438e-05, - "loss": 0.6483, - "step": 7953 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1493287069215946e-05, - "loss": 0.5776, - "step": 7954 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1491314867669936e-05, - "loss": 0.6526, - "step": 7955 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1489342606796865e-05, - "loss": 0.6236, - "step": 7956 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1487370286675195e-05, - "loss": 0.6, - "step": 7957 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.148539790738339e-05, - "loss": 0.6045, - "step": 7958 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1483425468999904e-05, - "loss": 0.5896, - "step": 7959 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1481452971603217e-05, - "loss": 0.5879, - "step": 7960 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1479480415271793e-05, - "loss": 0.617, - "step": 7961 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1477507800084102e-05, - "loss": 0.6615, - "step": 7962 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.147553512611862e-05, - "loss": 0.652, - "step": 7963 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1473562393453822e-05, - "loss": 0.6362, - "step": 7964 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.147158960216819e-05, - "loss": 0.5549, - "step": 7965 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1469616752340203e-05, - "loss": 0.5648, - "step": 7966 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.146764384404834e-05, - "loss": 0.6423, - "step": 7967 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1465670877371093e-05, - "loss": 0.7158, - "step": 7968 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1463697852386948e-05, - "loss": 0.731, - "step": 7969 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1461724769174393e-05, - "loss": 0.6515, - "step": 7970 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1459751627811926e-05, - "loss": 0.6176, - "step": 7971 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1457778428378037e-05, - "loss": 0.6244, - "step": 7972 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1455805170951223e-05, - "loss": 0.5141, - "step": 7973 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.145383185560999e-05, - "loss": 0.7555, - "step": 7974 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1451858482432829e-05, - "loss": 0.6631, - "step": 7975 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1449885051498256e-05, - "loss": 0.6981, - "step": 7976 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.144791156288477e-05, - "loss": 0.6354, - "step": 7977 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.144593801667088e-05, - "loss": 0.6697, - "step": 7978 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.14439644129351e-05, - "loss": 0.7367, - "step": 7979 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.144199075175594e-05, - "loss": 0.6666, - "step": 7980 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1440017033211922e-05, - "loss": 0.5865, - "step": 7981 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1438043257381556e-05, - "loss": 0.5432, - "step": 7982 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1436069424343365e-05, - "loss": 0.7018, - "step": 7983 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1434095534175872e-05, - "loss": 0.6548, - "step": 7984 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1432121586957608e-05, - "loss": 0.626, - "step": 7985 - }, - { - "epoch": 1.44, - "grad_norm": 0.0, - "learning_rate": 1.1430147582767088e-05, - "loss": 0.69, - "step": 7986 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1428173521682848e-05, - "loss": 0.519, - "step": 7987 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.142619940378342e-05, - "loss": 0.6767, - "step": 7988 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1424225229147337e-05, - "loss": 0.6798, - "step": 7989 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1422250997853132e-05, - "loss": 0.7194, - "step": 7990 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.142027670997935e-05, - "loss": 0.6647, - "step": 7991 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1418302365604524e-05, - "loss": 0.6314, - "step": 7992 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1416327964807202e-05, - "loss": 0.6736, - "step": 7993 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1414353507665922e-05, - "loss": 0.6512, - "step": 7994 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1412378994259243e-05, - "loss": 0.8143, - "step": 7995 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1410404424665709e-05, - "loss": 0.6147, - "step": 7996 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1408429798963865e-05, - "loss": 0.6265, - "step": 7997 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1406455117232274e-05, - "loss": 0.65, - "step": 7998 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1404480379549488e-05, - "loss": 0.6691, - "step": 7999 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1402505585994068e-05, - "loss": 0.6683, - "step": 8000 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1400530736644573e-05, - "loss": 0.7308, - "step": 8001 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1398555831579563e-05, - "loss": 0.7214, - "step": 8002 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.139658087087761e-05, - "loss": 0.6253, - "step": 8003 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1394605854617278e-05, - "loss": 0.5876, - "step": 8004 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.139263078287713e-05, - "loss": 0.6747, - "step": 8005 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.139065565573575e-05, - "loss": 0.7234, - "step": 8006 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1388680473271702e-05, - "loss": 0.6526, - "step": 8007 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1386705235563569e-05, - "loss": 0.6846, - "step": 8008 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1384729942689922e-05, - "loss": 0.6132, - "step": 8009 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1382754594729349e-05, - "loss": 0.6515, - "step": 8010 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.138077919176043e-05, - "loss": 0.6874, - "step": 8011 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1378803733861748e-05, - "loss": 0.6332, - "step": 8012 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1376828221111889e-05, - "loss": 0.6995, - "step": 8013 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1374852653589447e-05, - "loss": 0.6487, - "step": 8014 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.137287703137301e-05, - "loss": 0.707, - "step": 8015 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1370901354541176e-05, - "loss": 0.5996, - "step": 8016 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1368925623172535e-05, - "loss": 0.5256, - "step": 8017 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.136694983734569e-05, - "loss": 0.5888, - "step": 8018 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1364973997139237e-05, - "loss": 0.7795, - "step": 8019 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1362998102631781e-05, - "loss": 0.6358, - "step": 8020 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1361022153901921e-05, - "loss": 0.6231, - "step": 8021 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1359046151028272e-05, - "loss": 0.5502, - "step": 8022 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1357070094089443e-05, - "loss": 0.6194, - "step": 8023 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1355093983164034e-05, - "loss": 0.6762, - "step": 8024 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1353117818330668e-05, - "loss": 0.6954, - "step": 8025 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1351141599667957e-05, - "loss": 0.7561, - "step": 8026 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1349165327254519e-05, - "loss": 0.6701, - "step": 8027 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1347189001168972e-05, - "loss": 0.6476, - "step": 8028 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1345212621489936e-05, - "loss": 0.5984, - "step": 8029 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1343236188296043e-05, - "loss": 0.6368, - "step": 8030 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1341259701665912e-05, - "loss": 0.649, - "step": 8031 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.133928316167817e-05, - "loss": 0.6224, - "step": 8032 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.133730656841145e-05, - "loss": 0.5662, - "step": 8033 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1335329921944384e-05, - "loss": 0.701, - "step": 8034 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1333353222355607e-05, - "loss": 0.5909, - "step": 8035 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1331376469723755e-05, - "loss": 0.6855, - "step": 8036 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1329399664127466e-05, - "loss": 0.6041, - "step": 8037 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.132742280564538e-05, - "loss": 0.6551, - "step": 8038 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1325445894356142e-05, - "loss": 0.5937, - "step": 8039 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1323468930338392e-05, - "loss": 0.6654, - "step": 8040 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1321491913670783e-05, - "loss": 0.614, - "step": 8041 - }, - { - "epoch": 1.45, - "grad_norm": 0.0, - "learning_rate": 1.1319514844431967e-05, - "loss": 0.5206, - "step": 8042 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1317537722700582e-05, - "loss": 0.5335, - "step": 8043 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1315560548555294e-05, - "loss": 0.6674, - "step": 8044 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1313583322074752e-05, - "loss": 0.6419, - "step": 8045 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1311606043337617e-05, - "loss": 0.578, - "step": 8046 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1309628712422545e-05, - "loss": 0.5721, - "step": 8047 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1307651329408197e-05, - "loss": 0.6805, - "step": 8048 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1305673894373246e-05, - "loss": 0.7044, - "step": 8049 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1303696407396345e-05, - "loss": 0.8306, - "step": 8050 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1301718868556168e-05, - "loss": 0.5945, - "step": 8051 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1299741277931387e-05, - "loss": 0.687, - "step": 8052 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.129776363560067e-05, - "loss": 0.6239, - "step": 8053 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1295785941642692e-05, - "loss": 0.7105, - "step": 8054 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1293808196136129e-05, - "loss": 0.5562, - "step": 8055 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.129183039915966e-05, - "loss": 0.5749, - "step": 8056 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1289852550791967e-05, - "loss": 0.601, - "step": 8057 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1287874651111726e-05, - "loss": 0.6815, - "step": 8058 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1285896700197625e-05, - "loss": 0.639, - "step": 8059 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1283918698128356e-05, - "loss": 0.6951, - "step": 8060 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1281940644982596e-05, - "loss": 0.5804, - "step": 8061 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1279962540839043e-05, - "loss": 0.6574, - "step": 8062 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1277984385776386e-05, - "loss": 0.6118, - "step": 8063 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1276006179873323e-05, - "loss": 0.7113, - "step": 8064 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1274027923208548e-05, - "loss": 0.6612, - "step": 8065 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.127204961586076e-05, - "loss": 0.6648, - "step": 8066 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1270071257908657e-05, - "loss": 0.6563, - "step": 8067 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1268092849430948e-05, - "loss": 0.6186, - "step": 8068 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1266114390506328e-05, - "loss": 0.5712, - "step": 8069 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1264135881213509e-05, - "loss": 0.6639, - "step": 8070 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1262157321631201e-05, - "loss": 0.5743, - "step": 8071 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1260178711838113e-05, - "loss": 0.6788, - "step": 8072 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1258200051912955e-05, - "loss": 0.5574, - "step": 8073 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1256221341934446e-05, - "loss": 0.6553, - "step": 8074 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1254242581981298e-05, - "loss": 0.5983, - "step": 8075 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1252263772132236e-05, - "loss": 0.6261, - "step": 8076 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1250284912465969e-05, - "loss": 0.6262, - "step": 8077 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.124830600306123e-05, - "loss": 0.6619, - "step": 8078 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1246327043996741e-05, - "loss": 0.6604, - "step": 8079 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1244348035351223e-05, - "loss": 0.7014, - "step": 8080 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.124236897720341e-05, - "loss": 0.6168, - "step": 8081 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1240389869632034e-05, - "loss": 0.7063, - "step": 8082 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1238410712715822e-05, - "loss": 0.5878, - "step": 8083 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.123643150653351e-05, - "loss": 0.7444, - "step": 8084 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1234452251163835e-05, - "loss": 0.634, - "step": 8085 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1232472946685531e-05, - "loss": 0.7022, - "step": 8086 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1230493593177347e-05, - "loss": 0.5591, - "step": 8087 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1228514190718019e-05, - "loss": 0.6299, - "step": 8088 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1226534739386288e-05, - "loss": 0.6083, - "step": 8089 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1224555239260908e-05, - "loss": 0.6504, - "step": 8090 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1222575690420621e-05, - "loss": 0.7913, - "step": 8091 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.122059609294418e-05, - "loss": 0.5666, - "step": 8092 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1218616446910336e-05, - "loss": 0.5757, - "step": 8093 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1216636752397838e-05, - "loss": 0.541, - "step": 8094 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.121465700948545e-05, - "loss": 0.5282, - "step": 8095 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1212677218251926e-05, - "loss": 0.6245, - "step": 8096 - }, - { - "epoch": 1.46, - "grad_norm": 0.0, - "learning_rate": 1.1210697378776022e-05, - "loss": 0.6244, - "step": 8097 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1208717491136503e-05, - "loss": 0.6435, - "step": 8098 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1206737555412131e-05, - "loss": 0.7032, - "step": 8099 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1204757571681673e-05, - "loss": 0.5161, - "step": 8100 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1202777540023893e-05, - "loss": 0.6355, - "step": 8101 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1200797460517563e-05, - "loss": 0.6255, - "step": 8102 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1198817333241454e-05, - "loss": 0.5827, - "step": 8103 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1196837158274336e-05, - "loss": 0.6332, - "step": 8104 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1194856935694986e-05, - "loss": 0.6433, - "step": 8105 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1192876665582185e-05, - "loss": 0.6373, - "step": 8106 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1190896348014702e-05, - "loss": 0.6088, - "step": 8107 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.118891598307132e-05, - "loss": 0.5837, - "step": 8108 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1186935570830826e-05, - "loss": 0.686, - "step": 8109 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1184955111372002e-05, - "loss": 0.6517, - "step": 8110 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1182974604773633e-05, - "loss": 0.7018, - "step": 8111 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1180994051114509e-05, - "loss": 0.6299, - "step": 8112 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1179013450473417e-05, - "loss": 0.671, - "step": 8113 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1177032802929154e-05, - "loss": 0.6883, - "step": 8114 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1175052108560504e-05, - "loss": 0.7628, - "step": 8115 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.117307136744627e-05, - "loss": 0.5886, - "step": 8116 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1171090579665249e-05, - "loss": 0.5921, - "step": 8117 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1169109745296236e-05, - "loss": 0.6421, - "step": 8118 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1167128864418038e-05, - "loss": 0.6446, - "step": 8119 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1165147937109453e-05, - "loss": 0.714, - "step": 8120 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1163166963449286e-05, - "loss": 0.6472, - "step": 8121 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1161185943516346e-05, - "loss": 0.6635, - "step": 8122 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1159204877389438e-05, - "loss": 0.703, - "step": 8123 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1157223765147374e-05, - "loss": 0.5646, - "step": 8124 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.115524260686897e-05, - "loss": 0.6298, - "step": 8125 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1153261402633031e-05, - "loss": 0.6232, - "step": 8126 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1151280152518382e-05, - "loss": 0.6017, - "step": 8127 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1149298856603835e-05, - "loss": 0.6297, - "step": 8128 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.114731751496821e-05, - "loss": 0.6189, - "step": 8129 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1145336127690329e-05, - "loss": 0.616, - "step": 8130 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1143354694849016e-05, - "loss": 0.6868, - "step": 8131 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1141373216523093e-05, - "loss": 0.7329, - "step": 8132 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1139391692791387e-05, - "loss": 0.6288, - "step": 8133 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.113741012373273e-05, - "loss": 0.555, - "step": 8134 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1135428509425947e-05, - "loss": 0.6362, - "step": 8135 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1133446849949878e-05, - "loss": 0.6033, - "step": 8136 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1131465145383347e-05, - "loss": 0.6027, - "step": 8137 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1129483395805194e-05, - "loss": 0.8285, - "step": 8138 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.112750160129426e-05, - "loss": 0.6368, - "step": 8139 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1125519761929378e-05, - "loss": 0.6415, - "step": 8140 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1123537877789392e-05, - "loss": 0.6965, - "step": 8141 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1121555948953146e-05, - "loss": 0.6369, - "step": 8142 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.111957397549948e-05, - "loss": 0.5413, - "step": 8143 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1117591957507247e-05, - "loss": 0.6366, - "step": 8144 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1115609895055291e-05, - "loss": 0.6736, - "step": 8145 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.111362778822246e-05, - "loss": 0.6058, - "step": 8146 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1111645637087609e-05, - "loss": 0.6406, - "step": 8147 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.110966344172959e-05, - "loss": 0.6907, - "step": 8148 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1107681202227258e-05, - "loss": 0.6498, - "step": 8149 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1105698918659472e-05, - "loss": 0.6539, - "step": 8150 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1103716591105087e-05, - "loss": 0.6021, - "step": 8151 - }, - { - "epoch": 1.47, - "grad_norm": 0.0, - "learning_rate": 1.1101734219642965e-05, - "loss": 0.7121, - "step": 8152 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1099751804351971e-05, - "loss": 0.684, - "step": 8153 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1097769345310963e-05, - "loss": 0.5914, - "step": 8154 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1095786842598813e-05, - "loss": 0.6611, - "step": 8155 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1093804296294381e-05, - "loss": 0.662, - "step": 8156 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1091821706476544e-05, - "loss": 0.587, - "step": 8157 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1089839073224167e-05, - "loss": 0.634, - "step": 8158 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1087856396616126e-05, - "loss": 0.7195, - "step": 8159 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1085873676731294e-05, - "loss": 0.6426, - "step": 8160 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1083890913648548e-05, - "loss": 0.5787, - "step": 8161 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.108190810744676e-05, - "loss": 0.547, - "step": 8162 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.107992525820482e-05, - "loss": 0.5813, - "step": 8163 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1077942366001598e-05, - "loss": 0.6205, - "step": 8164 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1075959430915984e-05, - "loss": 0.6996, - "step": 8165 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1073976453026865e-05, - "loss": 0.7982, - "step": 8166 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1071993432413116e-05, - "loss": 0.5949, - "step": 8167 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1070010369153635e-05, - "loss": 0.6221, - "step": 8168 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1068027263327307e-05, - "loss": 0.5859, - "step": 8169 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1066044115013028e-05, - "loss": 0.7666, - "step": 8170 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1064060924289686e-05, - "loss": 0.5615, - "step": 8171 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1062077691236176e-05, - "loss": 0.5797, - "step": 8172 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1060094415931397e-05, - "loss": 0.5631, - "step": 8173 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.105811109845425e-05, - "loss": 0.5779, - "step": 8174 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1056127738883624e-05, - "loss": 0.706, - "step": 8175 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1054144337298432e-05, - "loss": 0.5833, - "step": 8176 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.105216089377757e-05, - "loss": 0.5525, - "step": 8177 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1050177408399948e-05, - "loss": 0.6649, - "step": 8178 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1048193881244467e-05, - "loss": 0.6902, - "step": 8179 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.104621031239004e-05, - "loss": 0.7179, - "step": 8180 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1044226701915571e-05, - "loss": 0.4949, - "step": 8181 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1042243049899978e-05, - "loss": 0.6061, - "step": 8182 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.104025935642217e-05, - "loss": 0.6299, - "step": 8183 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1038275621561063e-05, - "loss": 0.6522, - "step": 8184 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1036291845395575e-05, - "loss": 0.5946, - "step": 8185 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.103430802800462e-05, - "loss": 0.6145, - "step": 8186 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1032324169467118e-05, - "loss": 0.6851, - "step": 8187 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1030340269861994e-05, - "loss": 0.5912, - "step": 8188 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.102835632926817e-05, - "loss": 0.7202, - "step": 8189 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1026372347764569e-05, - "loss": 0.6975, - "step": 8190 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1024388325430115e-05, - "loss": 0.5882, - "step": 8191 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1022404262343741e-05, - "loss": 0.69, - "step": 8192 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1020420158584377e-05, - "loss": 0.5497, - "step": 8193 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1018436014230946e-05, - "loss": 0.6663, - "step": 8194 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1016451829362386e-05, - "loss": 0.6888, - "step": 8195 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1014467604057635e-05, - "loss": 0.733, - "step": 8196 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.101248333839562e-05, - "loss": 0.6539, - "step": 8197 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1010499032455288e-05, - "loss": 0.5525, - "step": 8198 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.100851468631557e-05, - "loss": 0.6079, - "step": 8199 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1006530300055412e-05, - "loss": 0.6159, - "step": 8200 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1004545873753754e-05, - "loss": 0.5584, - "step": 8201 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1002561407489538e-05, - "loss": 0.7326, - "step": 8202 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.1000576901341714e-05, - "loss": 0.5487, - "step": 8203 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.0998592355389229e-05, - "loss": 0.701, - "step": 8204 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.0996607769711023e-05, - "loss": 0.6126, - "step": 8205 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.0994623144386059e-05, - "loss": 0.6647, - "step": 8206 - }, - { - "epoch": 1.48, - "grad_norm": 0.0, - "learning_rate": 1.0992638479493279e-05, - "loss": 0.584, - "step": 8207 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0990653775111642e-05, - "loss": 0.6499, - "step": 8208 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0988669031320097e-05, - "loss": 0.7334, - "step": 8209 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0986684248197605e-05, - "loss": 0.6236, - "step": 8210 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0984699425823126e-05, - "loss": 0.5746, - "step": 8211 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0982714564275618e-05, - "loss": 0.6518, - "step": 8212 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0980729663634038e-05, - "loss": 0.638, - "step": 8213 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0978744723977353e-05, - "loss": 0.65, - "step": 8214 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0976759745384524e-05, - "loss": 0.7356, - "step": 8215 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0974774727934524e-05, - "loss": 0.6545, - "step": 8216 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0972789671706312e-05, - "loss": 0.6801, - "step": 8217 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.097080457677886e-05, - "loss": 0.6048, - "step": 8218 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.096881944323114e-05, - "loss": 0.5836, - "step": 8219 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0966834271142124e-05, - "loss": 0.6204, - "step": 8220 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.096484906059078e-05, - "loss": 0.5649, - "step": 8221 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0962863811656093e-05, - "loss": 0.6096, - "step": 8222 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.096087852441703e-05, - "loss": 0.6578, - "step": 8223 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0958893198952576e-05, - "loss": 0.6823, - "step": 8224 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0956907835341706e-05, - "loss": 0.5593, - "step": 8225 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0954922433663404e-05, - "loss": 0.5596, - "step": 8226 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0952936993996652e-05, - "loss": 0.5814, - "step": 8227 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0950951516420435e-05, - "loss": 0.6765, - "step": 8228 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0948966001013732e-05, - "loss": 0.6201, - "step": 8229 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0946980447855543e-05, - "loss": 0.5871, - "step": 8230 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0944994857024846e-05, - "loss": 0.6914, - "step": 8231 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0943009228600635e-05, - "loss": 0.6735, - "step": 8232 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0941023562661902e-05, - "loss": 0.6884, - "step": 8233 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0939037859287638e-05, - "loss": 0.6892, - "step": 8234 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0937052118556841e-05, - "loss": 0.6005, - "step": 8235 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0935066340548506e-05, - "loss": 0.6098, - "step": 8236 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0933080525341625e-05, - "loss": 0.6911, - "step": 8237 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.093109467301521e-05, - "loss": 0.6299, - "step": 8238 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.092910878364825e-05, - "loss": 0.5997, - "step": 8239 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.092712285731975e-05, - "loss": 0.6112, - "step": 8240 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0925136894108714e-05, - "loss": 0.6912, - "step": 8241 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.092315089409415e-05, - "loss": 0.7122, - "step": 8242 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0921164857355061e-05, - "loss": 0.7344, - "step": 8243 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0919178783970457e-05, - "loss": 0.7063, - "step": 8244 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0917192674019345e-05, - "loss": 0.6514, - "step": 8245 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0915206527580737e-05, - "loss": 0.6427, - "step": 8246 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0913220344733647e-05, - "loss": 0.7499, - "step": 8247 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0911234125557084e-05, - "loss": 0.6599, - "step": 8248 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.090924787013007e-05, - "loss": 0.5662, - "step": 8249 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0907261578531619e-05, - "loss": 0.6244, - "step": 8250 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0905275250840744e-05, - "loss": 0.6198, - "step": 8251 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.090328888713647e-05, - "loss": 0.6442, - "step": 8252 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0901302487497818e-05, - "loss": 0.7068, - "step": 8253 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0899316052003808e-05, - "loss": 0.5822, - "step": 8254 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0897329580733467e-05, - "loss": 0.6059, - "step": 8255 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0895343073765812e-05, - "loss": 0.6287, - "step": 8256 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0893356531179883e-05, - "loss": 0.6358, - "step": 8257 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0891369953054698e-05, - "loss": 0.7025, - "step": 8258 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0889383339469287e-05, - "loss": 0.7239, - "step": 8259 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0887396690502686e-05, - "loss": 0.5491, - "step": 8260 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0885410006233923e-05, - "loss": 0.5919, - "step": 8261 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0883423286742035e-05, - "loss": 0.6678, - "step": 8262 - }, - { - "epoch": 1.49, - "grad_norm": 0.0, - "learning_rate": 1.0881436532106054e-05, - "loss": 0.6263, - "step": 8263 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0879449742405015e-05, - "loss": 0.6123, - "step": 8264 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0877462917717965e-05, - "loss": 0.7124, - "step": 8265 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0875476058123933e-05, - "loss": 0.7149, - "step": 8266 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0873489163701963e-05, - "loss": 0.6489, - "step": 8267 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0871502234531097e-05, - "loss": 0.7282, - "step": 8268 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0869515270690386e-05, - "loss": 0.727, - "step": 8269 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.086752827225886e-05, - "loss": 0.6656, - "step": 8270 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0865541239315576e-05, - "loss": 0.6004, - "step": 8271 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0863554171939578e-05, - "loss": 0.6601, - "step": 8272 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0861567070209918e-05, - "loss": 0.6136, - "step": 8273 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0859579934205642e-05, - "loss": 0.6493, - "step": 8274 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0857592764005803e-05, - "loss": 0.6867, - "step": 8275 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0855605559689458e-05, - "loss": 0.644, - "step": 8276 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0853618321335655e-05, - "loss": 0.6897, - "step": 8277 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0851631049023453e-05, - "loss": 0.7058, - "step": 8278 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.084964374283191e-05, - "loss": 0.6235, - "step": 8279 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0847656402840083e-05, - "loss": 0.7392, - "step": 8280 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0845669029127032e-05, - "loss": 0.5888, - "step": 8281 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.084368162177182e-05, - "loss": 0.633, - "step": 8282 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0841694180853503e-05, - "loss": 0.5354, - "step": 8283 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0839706706451157e-05, - "loss": 0.6413, - "step": 8284 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0837719198643834e-05, - "loss": 0.6595, - "step": 8285 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0835731657510606e-05, - "loss": 0.6196, - "step": 8286 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0833744083130544e-05, - "loss": 0.7083, - "step": 8287 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0831756475582713e-05, - "loss": 0.6121, - "step": 8288 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0829768834946185e-05, - "loss": 0.693, - "step": 8289 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.082778116130003e-05, - "loss": 0.6041, - "step": 8290 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0825793454723325e-05, - "loss": 0.5636, - "step": 8291 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.082380571529514e-05, - "loss": 0.5757, - "step": 8292 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0821817943094552e-05, - "loss": 0.5915, - "step": 8293 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0819830138200638e-05, - "loss": 0.61, - "step": 8294 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0817842300692478e-05, - "loss": 0.551, - "step": 8295 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.081585443064915e-05, - "loss": 0.679, - "step": 8296 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0813866528149732e-05, - "loss": 0.5944, - "step": 8297 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0811878593273313e-05, - "loss": 0.5834, - "step": 8298 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0809890626098971e-05, - "loss": 0.7109, - "step": 8299 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0807902626705796e-05, - "loss": 0.5887, - "step": 8300 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0805914595172867e-05, - "loss": 0.5284, - "step": 8301 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0803926531579271e-05, - "loss": 0.6007, - "step": 8302 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0801938436004109e-05, - "loss": 0.5646, - "step": 8303 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0799950308526457e-05, - "loss": 0.7245, - "step": 8304 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0797962149225411e-05, - "loss": 0.6774, - "step": 8305 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0795973958180066e-05, - "loss": 0.6275, - "step": 8306 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0793985735469512e-05, - "loss": 0.6208, - "step": 8307 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0791997481172847e-05, - "loss": 0.5682, - "step": 8308 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0790009195369162e-05, - "loss": 0.5869, - "step": 8309 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0788020878137561e-05, - "loss": 0.6372, - "step": 8310 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.078603252955714e-05, - "loss": 0.6149, - "step": 8311 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0784044149706996e-05, - "loss": 0.7418, - "step": 8312 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.078205573866623e-05, - "loss": 0.5917, - "step": 8313 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0780067296513954e-05, - "loss": 0.6231, - "step": 8314 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0778078823329261e-05, - "loss": 0.6256, - "step": 8315 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0776090319191259e-05, - "loss": 0.6283, - "step": 8316 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0774101784179054e-05, - "loss": 0.5318, - "step": 8317 - }, - { - "epoch": 1.5, - "grad_norm": 0.0, - "learning_rate": 1.0772113218371755e-05, - "loss": 0.5888, - "step": 8318 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.077012462184847e-05, - "loss": 0.5496, - "step": 8319 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0768135994688308e-05, - "loss": 0.5805, - "step": 8320 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0766147336970378e-05, - "loss": 0.5766, - "step": 8321 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.07641586487738e-05, - "loss": 0.5606, - "step": 8322 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0762169930177678e-05, - "loss": 0.593, - "step": 8323 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.076018118126113e-05, - "loss": 0.7113, - "step": 8324 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0758192402103274e-05, - "loss": 0.6052, - "step": 8325 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0756203592783223e-05, - "loss": 0.5954, - "step": 8326 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0754214753380098e-05, - "loss": 0.5925, - "step": 8327 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.075222588397302e-05, - "loss": 0.6529, - "step": 8328 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0750236984641108e-05, - "loss": 0.7212, - "step": 8329 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0748248055463483e-05, - "loss": 0.5864, - "step": 8330 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0746259096519267e-05, - "loss": 0.5273, - "step": 8331 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0744270107887587e-05, - "loss": 0.6868, - "step": 8332 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0742281089647568e-05, - "loss": 0.539, - "step": 8333 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0740292041878333e-05, - "loss": 0.69, - "step": 8334 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0738302964659012e-05, - "loss": 0.6339, - "step": 8335 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.073631385806874e-05, - "loss": 0.6167, - "step": 8336 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0734324722186636e-05, - "loss": 0.7406, - "step": 8337 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0732335557091838e-05, - "loss": 0.6237, - "step": 8338 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0730346362863477e-05, - "loss": 0.626, - "step": 8339 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0728357139580685e-05, - "loss": 0.651, - "step": 8340 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0726367887322602e-05, - "loss": 0.6148, - "step": 8341 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.072437860616836e-05, - "loss": 0.5342, - "step": 8342 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.072238929619709e-05, - "loss": 0.6907, - "step": 8343 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0720399957487943e-05, - "loss": 0.6613, - "step": 8344 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0718410590120048e-05, - "loss": 0.6353, - "step": 8345 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.071642119417255e-05, - "loss": 0.6918, - "step": 8346 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0714431769724589e-05, - "loss": 0.57, - "step": 8347 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.071244231685531e-05, - "loss": 0.6237, - "step": 8348 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0710452835643855e-05, - "loss": 0.6951, - "step": 8349 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0708463326169367e-05, - "loss": 0.6641, - "step": 8350 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0706473788510993e-05, - "loss": 0.656, - "step": 8351 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0704484222747886e-05, - "loss": 0.5686, - "step": 8352 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0702494628959185e-05, - "loss": 0.6446, - "step": 8353 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.070050500722405e-05, - "loss": 0.5691, - "step": 8354 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0698515357621622e-05, - "loss": 0.6397, - "step": 8355 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0696525680231055e-05, - "loss": 0.6128, - "step": 8356 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0694535975131507e-05, - "loss": 0.6723, - "step": 8357 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0692546242402126e-05, - "loss": 0.5894, - "step": 8358 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0690556482122067e-05, - "loss": 0.6137, - "step": 8359 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0688566694370495e-05, - "loss": 0.7203, - "step": 8360 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0686576879226555e-05, - "loss": 0.7592, - "step": 8361 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.068458703676941e-05, - "loss": 0.695, - "step": 8362 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0682597167078223e-05, - "loss": 0.6466, - "step": 8363 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0680607270232148e-05, - "loss": 0.6523, - "step": 8364 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0678617346310352e-05, - "loss": 0.5617, - "step": 8365 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0676627395391994e-05, - "loss": 0.6905, - "step": 8366 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0674637417556242e-05, - "loss": 0.5947, - "step": 8367 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0672647412882255e-05, - "loss": 0.7093, - "step": 8368 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0670657381449202e-05, - "loss": 0.5383, - "step": 8369 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.066866732333625e-05, - "loss": 0.6683, - "step": 8370 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0666677238622567e-05, - "loss": 0.6244, - "step": 8371 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.066468712738732e-05, - "loss": 0.635, - "step": 8372 - }, - { - "epoch": 1.51, - "grad_norm": 0.0, - "learning_rate": 1.0662696989709682e-05, - "loss": 0.7343, - "step": 8373 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.066070682566882e-05, - "loss": 0.6833, - "step": 8374 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.065871663534391e-05, - "loss": 0.5754, - "step": 8375 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0656726418814125e-05, - "loss": 0.4745, - "step": 8376 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0654736176158638e-05, - "loss": 0.584, - "step": 8377 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0652745907456624e-05, - "loss": 0.7142, - "step": 8378 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0650755612787258e-05, - "loss": 0.607, - "step": 8379 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0648765292229719e-05, - "loss": 0.7093, - "step": 8380 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0646774945863185e-05, - "loss": 0.7611, - "step": 8381 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.064478457376684e-05, - "loss": 0.533, - "step": 8382 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0642794176019854e-05, - "loss": 0.5762, - "step": 8383 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0640803752701419e-05, - "loss": 0.6549, - "step": 8384 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0638813303890711e-05, - "loss": 0.5863, - "step": 8385 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0636822829666915e-05, - "loss": 0.6776, - "step": 8386 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0634832330109217e-05, - "loss": 0.6476, - "step": 8387 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0632841805296803e-05, - "loss": 0.566, - "step": 8388 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0630851255308854e-05, - "loss": 0.58, - "step": 8389 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0628860680224563e-05, - "loss": 0.6531, - "step": 8390 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0626870080123118e-05, - "loss": 0.6165, - "step": 8391 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0624879455083706e-05, - "loss": 0.658, - "step": 8392 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0622888805185518e-05, - "loss": 0.5922, - "step": 8393 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0620898130507748e-05, - "loss": 0.5608, - "step": 8394 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0618907431129589e-05, - "loss": 0.565, - "step": 8395 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0616916707130227e-05, - "loss": 0.6736, - "step": 8396 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0614925958588864e-05, - "loss": 0.6847, - "step": 8397 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0612935185584694e-05, - "loss": 0.6245, - "step": 8398 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0610944388196908e-05, - "loss": 0.5688, - "step": 8399 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0608953566504711e-05, - "loss": 0.8756, - "step": 8400 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0606962720587301e-05, - "loss": 0.6791, - "step": 8401 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0604971850523868e-05, - "loss": 0.6763, - "step": 8402 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0602980956393622e-05, - "loss": 0.6739, - "step": 8403 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.060099003827576e-05, - "loss": 0.5813, - "step": 8404 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0598999096249486e-05, - "loss": 0.6257, - "step": 8405 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0597008130393999e-05, - "loss": 0.6032, - "step": 8406 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0595017140788508e-05, - "loss": 0.5039, - "step": 8407 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0593026127512214e-05, - "loss": 0.5911, - "step": 8408 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0591035090644328e-05, - "loss": 0.6542, - "step": 8409 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.058904403026405e-05, - "loss": 0.6296, - "step": 8410 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0587052946450593e-05, - "loss": 0.6367, - "step": 8411 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0585061839283165e-05, - "loss": 0.609, - "step": 8412 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0583070708840972e-05, - "loss": 0.6001, - "step": 8413 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0581079555203231e-05, - "loss": 0.5693, - "step": 8414 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0579088378449148e-05, - "loss": 0.7164, - "step": 8415 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0577097178657935e-05, - "loss": 0.7017, - "step": 8416 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.057510595590881e-05, - "loss": 0.5901, - "step": 8417 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0573114710280984e-05, - "loss": 0.6429, - "step": 8418 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0571123441853674e-05, - "loss": 0.6279, - "step": 8419 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0569132150706099e-05, - "loss": 0.5821, - "step": 8420 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0567140836917465e-05, - "loss": 0.6696, - "step": 8421 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0565149500567e-05, - "loss": 0.5778, - "step": 8422 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0563158141733917e-05, - "loss": 0.6607, - "step": 8423 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0561166760497442e-05, - "loss": 0.5319, - "step": 8424 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0559175356936793e-05, - "loss": 0.6457, - "step": 8425 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0557183931131186e-05, - "loss": 0.5847, - "step": 8426 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0555192483159848e-05, - "loss": 0.6721, - "step": 8427 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0553201013102008e-05, - "loss": 0.5982, - "step": 8428 - }, - { - "epoch": 1.52, - "grad_norm": 0.0, - "learning_rate": 1.0551209521036877e-05, - "loss": 0.6356, - "step": 8429 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.054921800704369e-05, - "loss": 0.6849, - "step": 8430 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.054722647120167e-05, - "loss": 0.6716, - "step": 8431 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0545234913590043e-05, - "loss": 0.6414, - "step": 8432 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0543243334288038e-05, - "loss": 0.6476, - "step": 8433 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0541251733374883e-05, - "loss": 0.6406, - "step": 8434 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0539260110929805e-05, - "loss": 0.8401, - "step": 8435 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0537268467032036e-05, - "loss": 0.5983, - "step": 8436 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0535276801760805e-05, - "loss": 0.6718, - "step": 8437 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0533285115195351e-05, - "loss": 0.6562, - "step": 8438 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.05312934074149e-05, - "loss": 0.6166, - "step": 8439 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0529301678498685e-05, - "loss": 0.6226, - "step": 8440 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0527309928525944e-05, - "loss": 0.6355, - "step": 8441 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.052531815757591e-05, - "loss": 0.5946, - "step": 8442 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.052332636572782e-05, - "loss": 0.6568, - "step": 8443 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0521334553060912e-05, - "loss": 0.6923, - "step": 8444 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0519342719654418e-05, - "loss": 0.6184, - "step": 8445 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0517350865587584e-05, - "loss": 0.5857, - "step": 8446 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0515358990939648e-05, - "loss": 0.6479, - "step": 8447 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0513367095789846e-05, - "loss": 0.6211, - "step": 8448 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0511375180217422e-05, - "loss": 0.611, - "step": 8449 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0509383244301619e-05, - "loss": 0.7008, - "step": 8450 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0507391288121675e-05, - "loss": 0.5878, - "step": 8451 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.050539931175684e-05, - "loss": 0.687, - "step": 8452 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0503407315286352e-05, - "loss": 0.6267, - "step": 8453 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.050141529878946e-05, - "loss": 0.6349, - "step": 8454 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0499423262345407e-05, - "loss": 0.7383, - "step": 8455 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.049743120603344e-05, - "loss": 0.6119, - "step": 8456 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0495439129932807e-05, - "loss": 0.659, - "step": 8457 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0493447034122762e-05, - "loss": 0.564, - "step": 8458 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0491454918682545e-05, - "loss": 0.5524, - "step": 8459 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0489462783691408e-05, - "loss": 0.6466, - "step": 8460 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0487470629228607e-05, - "loss": 0.5789, - "step": 8461 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0485478455373386e-05, - "loss": 0.6136, - "step": 8462 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0483486262205002e-05, - "loss": 0.6276, - "step": 8463 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0481494049802703e-05, - "loss": 0.734, - "step": 8464 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.047950181824575e-05, - "loss": 0.6674, - "step": 8465 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0477509567613394e-05, - "loss": 0.5665, - "step": 8466 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0475517297984884e-05, - "loss": 0.5657, - "step": 8467 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0473525009439485e-05, - "loss": 0.7442, - "step": 8468 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0471532702056449e-05, - "loss": 0.5784, - "step": 8469 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0469540375915034e-05, - "loss": 0.5524, - "step": 8470 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.04675480310945e-05, - "loss": 0.6328, - "step": 8471 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.04655556676741e-05, - "loss": 0.5282, - "step": 8472 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0463563285733107e-05, - "loss": 0.6627, - "step": 8473 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0461570885350767e-05, - "loss": 0.608, - "step": 8474 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0459578466606345e-05, - "loss": 0.6879, - "step": 8475 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0457586029579105e-05, - "loss": 0.5793, - "step": 8476 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0455593574348311e-05, - "loss": 0.6437, - "step": 8477 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0453601100993221e-05, - "loss": 0.6543, - "step": 8478 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0451608609593105e-05, - "loss": 0.5933, - "step": 8479 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0449616100227223e-05, - "loss": 0.7311, - "step": 8480 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0447623572974844e-05, - "loss": 0.5684, - "step": 8481 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0445631027915231e-05, - "loss": 0.5922, - "step": 8482 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.044363846512765e-05, - "loss": 0.6332, - "step": 8483 - }, - { - "epoch": 1.53, - "grad_norm": 0.0, - "learning_rate": 1.0441645884691373e-05, - "loss": 0.5848, - "step": 8484 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.043965328668567e-05, - "loss": 0.641, - "step": 8485 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0437660671189799e-05, - "loss": 0.68, - "step": 8486 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0435668038283043e-05, - "loss": 0.6345, - "step": 8487 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0433675388044665e-05, - "loss": 0.5361, - "step": 8488 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0431682720553935e-05, - "loss": 0.6906, - "step": 8489 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0429690035890128e-05, - "loss": 0.534, - "step": 8490 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0427697334132512e-05, - "loss": 0.602, - "step": 8491 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0425704615360368e-05, - "loss": 0.6785, - "step": 8492 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0423711879652965e-05, - "loss": 0.7189, - "step": 8493 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0421719127089573e-05, - "loss": 0.5902, - "step": 8494 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0419726357749475e-05, - "loss": 0.6606, - "step": 8495 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0417733571711944e-05, - "loss": 0.6228, - "step": 8496 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0415740769056255e-05, - "loss": 0.7308, - "step": 8497 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0413747949861685e-05, - "loss": 0.5839, - "step": 8498 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0411755114207513e-05, - "loss": 0.5922, - "step": 8499 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.040976226217302e-05, - "loss": 0.6404, - "step": 8500 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0407769393837478e-05, - "loss": 0.5274, - "step": 8501 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0405776509280173e-05, - "loss": 0.5408, - "step": 8502 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0403783608580386e-05, - "loss": 0.5492, - "step": 8503 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0401790691817392e-05, - "loss": 0.6174, - "step": 8504 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0399797759070476e-05, - "loss": 0.6524, - "step": 8505 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0397804810418921e-05, - "loss": 0.6826, - "step": 8506 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.039581184594201e-05, - "loss": 0.6595, - "step": 8507 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0393818865719026e-05, - "loss": 0.7687, - "step": 8508 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0391825869829251e-05, - "loss": 0.5474, - "step": 8509 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.038983285835197e-05, - "loss": 0.6111, - "step": 8510 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0387839831366476e-05, - "loss": 0.6777, - "step": 8511 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0385846788952046e-05, - "loss": 0.6836, - "step": 8512 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0383853731187965e-05, - "loss": 0.6167, - "step": 8513 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0381860658153531e-05, - "loss": 0.6532, - "step": 8514 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0379867569928024e-05, - "loss": 0.6623, - "step": 8515 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0377874466590733e-05, - "loss": 0.5925, - "step": 8516 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0375881348220952e-05, - "loss": 0.6341, - "step": 8517 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0373888214897964e-05, - "loss": 0.7337, - "step": 8518 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0371895066701066e-05, - "loss": 0.6528, - "step": 8519 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0369901903709541e-05, - "loss": 0.5721, - "step": 8520 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0367908726002684e-05, - "loss": 0.6636, - "step": 8521 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0365915533659791e-05, - "loss": 0.7258, - "step": 8522 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0363922326760152e-05, - "loss": 0.6831, - "step": 8523 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0361929105383054e-05, - "loss": 0.6935, - "step": 8524 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.03599358696078e-05, - "loss": 0.71, - "step": 8525 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0357942619513678e-05, - "loss": 0.7498, - "step": 8526 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0355949355179988e-05, - "loss": 0.5952, - "step": 8527 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0353956076686025e-05, - "loss": 0.6254, - "step": 8528 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0351962784111076e-05, - "loss": 0.6597, - "step": 8529 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0349969477534453e-05, - "loss": 0.5972, - "step": 8530 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.034797615703544e-05, - "loss": 0.5861, - "step": 8531 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0345982822693337e-05, - "loss": 0.6943, - "step": 8532 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0343989474587449e-05, - "loss": 0.5996, - "step": 8533 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0341996112797068e-05, - "loss": 0.6002, - "step": 8534 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0340002737401496e-05, - "loss": 0.5725, - "step": 8535 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0338009348480033e-05, - "loss": 0.5564, - "step": 8536 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0336015946111977e-05, - "loss": 0.6302, - "step": 8537 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0334022530376636e-05, - "loss": 0.6942, - "step": 8538 - }, - { - "epoch": 1.54, - "grad_norm": 0.0, - "learning_rate": 1.0332029101353304e-05, - "loss": 0.6426, - "step": 8539 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0330035659121283e-05, - "loss": 0.6951, - "step": 8540 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0328042203759883e-05, - "loss": 0.7145, - "step": 8541 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0326048735348398e-05, - "loss": 0.5434, - "step": 8542 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0324055253966136e-05, - "loss": 0.5661, - "step": 8543 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0322061759692403e-05, - "loss": 0.747, - "step": 8544 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0320068252606501e-05, - "loss": 0.5893, - "step": 8545 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0318074732787734e-05, - "loss": 0.5349, - "step": 8546 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0316081200315414e-05, - "loss": 0.6418, - "step": 8547 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0314087655268835e-05, - "loss": 0.63, - "step": 8548 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0312094097727317e-05, - "loss": 0.6122, - "step": 8549 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.031010052777016e-05, - "loss": 0.61, - "step": 8550 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0308106945476668e-05, - "loss": 0.538, - "step": 8551 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0306113350926158e-05, - "loss": 0.6215, - "step": 8552 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0304119744197935e-05, - "loss": 0.6188, - "step": 8553 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0302126125371309e-05, - "loss": 0.6721, - "step": 8554 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0300132494525586e-05, - "loss": 0.5931, - "step": 8555 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0298138851740077e-05, - "loss": 0.5576, - "step": 8556 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0296145197094093e-05, - "loss": 0.5609, - "step": 8557 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.029415153066695e-05, - "loss": 0.5665, - "step": 8558 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.029215785253795e-05, - "loss": 0.6421, - "step": 8559 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0290164162786415e-05, - "loss": 0.6814, - "step": 8560 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.028817046149165e-05, - "loss": 0.6196, - "step": 8561 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.028617674873297e-05, - "loss": 0.6574, - "step": 8562 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0284183024589689e-05, - "loss": 0.6786, - "step": 8563 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0282189289141122e-05, - "loss": 0.6882, - "step": 8564 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.028019554246658e-05, - "loss": 0.5935, - "step": 8565 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0278201784645381e-05, - "loss": 0.6254, - "step": 8566 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0276208015756834e-05, - "loss": 0.623, - "step": 8567 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0274214235880267e-05, - "loss": 0.6674, - "step": 8568 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0272220445094983e-05, - "loss": 0.6039, - "step": 8569 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0270226643480301e-05, - "loss": 0.5818, - "step": 8570 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0268232831115543e-05, - "loss": 0.5468, - "step": 8571 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0266239008080024e-05, - "loss": 0.6137, - "step": 8572 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.026424517445306e-05, - "loss": 0.5906, - "step": 8573 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.026225133031397e-05, - "loss": 0.7448, - "step": 8574 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0260257475742075e-05, - "loss": 0.5863, - "step": 8575 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.025826361081669e-05, - "loss": 0.5658, - "step": 8576 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0256269735617138e-05, - "loss": 0.6371, - "step": 8577 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0254275850222732e-05, - "loss": 0.56, - "step": 8578 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0252281954712803e-05, - "loss": 0.5838, - "step": 8579 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.025028804916666e-05, - "loss": 0.6102, - "step": 8580 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0248294133663634e-05, - "loss": 0.6464, - "step": 8581 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0246300208283042e-05, - "loss": 0.6863, - "step": 8582 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0244306273104203e-05, - "loss": 0.6699, - "step": 8583 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0242312328206446e-05, - "loss": 0.5838, - "step": 8584 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0240318373669085e-05, - "loss": 0.5508, - "step": 8585 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0238324409571447e-05, - "loss": 0.6544, - "step": 8586 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.023633043599286e-05, - "loss": 0.6968, - "step": 8587 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.023433645301264e-05, - "loss": 0.7273, - "step": 8588 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0232342460710116e-05, - "loss": 0.5919, - "step": 8589 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0230348459164611e-05, - "loss": 0.7346, - "step": 8590 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.022835444845545e-05, - "loss": 0.7018, - "step": 8591 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.022636042866196e-05, - "loss": 0.643, - "step": 8592 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0224366399863462e-05, - "loss": 0.6103, - "step": 8593 - }, - { - "epoch": 1.55, - "grad_norm": 0.0, - "learning_rate": 1.0222372362139283e-05, - "loss": 0.5341, - "step": 8594 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0220378315568751e-05, - "loss": 0.581, - "step": 8595 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0218384260231193e-05, - "loss": 0.6871, - "step": 8596 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0216390196205931e-05, - "loss": 0.6788, - "step": 8597 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0214396123572302e-05, - "loss": 0.5237, - "step": 8598 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0212402042409626e-05, - "loss": 0.6398, - "step": 8599 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0210407952797232e-05, - "loss": 0.4788, - "step": 8600 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.020841385481445e-05, - "loss": 0.6649, - "step": 8601 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0206419748540608e-05, - "loss": 0.6826, - "step": 8602 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0204425634055036e-05, - "loss": 0.5619, - "step": 8603 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.020243151143706e-05, - "loss": 0.5801, - "step": 8604 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.020043738076601e-05, - "loss": 0.5407, - "step": 8605 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0198443242121224e-05, - "loss": 0.6065, - "step": 8606 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0196449095582017e-05, - "loss": 0.6119, - "step": 8607 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0194454941227734e-05, - "loss": 0.5955, - "step": 8608 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0192460779137703e-05, - "loss": 0.68, - "step": 8609 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0190466609391246e-05, - "loss": 0.725, - "step": 8610 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0188472432067704e-05, - "loss": 0.6867, - "step": 8611 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0186478247246405e-05, - "loss": 0.7767, - "step": 8612 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0184484055006686e-05, - "loss": 0.5748, - "step": 8613 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.018248985542787e-05, - "loss": 0.6315, - "step": 8614 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0180495648589297e-05, - "loss": 0.6253, - "step": 8615 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0178501434570296e-05, - "loss": 0.6687, - "step": 8616 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0176507213450206e-05, - "loss": 0.6815, - "step": 8617 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0174512985308354e-05, - "loss": 0.784, - "step": 8618 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0172518750224077e-05, - "loss": 0.683, - "step": 8619 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0170524508276707e-05, - "loss": 0.6425, - "step": 8620 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0168530259545582e-05, - "loss": 0.6929, - "step": 8621 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0166536004110034e-05, - "loss": 0.7613, - "step": 8622 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0164541742049398e-05, - "loss": 0.5571, - "step": 8623 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.016254747344301e-05, - "loss": 0.7002, - "step": 8624 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0160553198370207e-05, - "loss": 0.8166, - "step": 8625 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0158558916910319e-05, - "loss": 0.6857, - "step": 8626 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0156564629142688e-05, - "loss": 0.6074, - "step": 8627 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.015457033514665e-05, - "loss": 0.6018, - "step": 8628 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0152576035001534e-05, - "loss": 0.5749, - "step": 8629 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0150581728786686e-05, - "loss": 0.6272, - "step": 8630 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0148587416581436e-05, - "loss": 0.5716, - "step": 8631 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0146593098465127e-05, - "loss": 0.6541, - "step": 8632 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0144598774517092e-05, - "loss": 0.5857, - "step": 8633 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0142604444816668e-05, - "loss": 0.6547, - "step": 8634 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0140610109443196e-05, - "loss": 0.5722, - "step": 8635 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0138615768476018e-05, - "loss": 0.6287, - "step": 8636 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0136621421994462e-05, - "loss": 0.6309, - "step": 8637 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0134627070077872e-05, - "loss": 0.6502, - "step": 8638 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0132632712805589e-05, - "loss": 0.5881, - "step": 8639 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0130638350256948e-05, - "loss": 0.5871, - "step": 8640 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0128643982511287e-05, - "loss": 0.6486, - "step": 8641 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0126649609647952e-05, - "loss": 0.5996, - "step": 8642 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0124655231746275e-05, - "loss": 0.6986, - "step": 8643 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0122660848885604e-05, - "loss": 0.5003, - "step": 8644 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.012066646114527e-05, - "loss": 0.6274, - "step": 8645 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.011867206860462e-05, - "loss": 0.5761, - "step": 8646 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0116677671342994e-05, - "loss": 0.5878, - "step": 8647 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0114683269439725e-05, - "loss": 0.707, - "step": 8648 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0112688862974161e-05, - "loss": 0.4873, - "step": 8649 - }, - { - "epoch": 1.56, - "grad_norm": 0.0, - "learning_rate": 1.0110694452025642e-05, - "loss": 0.6533, - "step": 8650 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0108700036673506e-05, - "loss": 0.5475, - "step": 8651 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.01067056169971e-05, - "loss": 0.636, - "step": 8652 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0104711193075756e-05, - "loss": 0.5851, - "step": 8653 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0102716764988827e-05, - "loss": 0.6559, - "step": 8654 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0100722332815649e-05, - "loss": 0.5488, - "step": 8655 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.009872789663556e-05, - "loss": 0.6653, - "step": 8656 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0096733456527907e-05, - "loss": 0.612, - "step": 8657 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0094739012572034e-05, - "loss": 0.6946, - "step": 8658 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0092744564847278e-05, - "loss": 0.6411, - "step": 8659 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0090750113432987e-05, - "loss": 0.6064, - "step": 8660 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0088755658408501e-05, - "loss": 0.6799, - "step": 8661 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0086761199853161e-05, - "loss": 0.6289, - "step": 8662 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0084766737846312e-05, - "loss": 0.6742, - "step": 8663 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0082772272467297e-05, - "loss": 0.6036, - "step": 8664 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0080777803795458e-05, - "loss": 0.5921, - "step": 8665 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0078783331910144e-05, - "loss": 0.5381, - "step": 8666 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0076788856890692e-05, - "loss": 0.628, - "step": 8667 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0074794378816445e-05, - "loss": 0.6109, - "step": 8668 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0072799897766751e-05, - "loss": 0.5652, - "step": 8669 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0070805413820954e-05, - "loss": 0.727, - "step": 8670 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0068810927058397e-05, - "loss": 0.5428, - "step": 8671 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0066816437558418e-05, - "loss": 0.659, - "step": 8672 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0064821945400372e-05, - "loss": 0.6549, - "step": 8673 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0062827450663597e-05, - "loss": 0.5345, - "step": 8674 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0060832953427437e-05, - "loss": 0.7175, - "step": 8675 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0058838453771239e-05, - "loss": 0.6064, - "step": 8676 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0056843951774347e-05, - "loss": 0.583, - "step": 8677 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0054849447516103e-05, - "loss": 0.6228, - "step": 8678 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0052854941075855e-05, - "loss": 0.7416, - "step": 8679 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0050860432532946e-05, - "loss": 0.6021, - "step": 8680 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0048865921966725e-05, - "loss": 0.5443, - "step": 8681 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0046871409456534e-05, - "loss": 0.6075, - "step": 8682 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0044876895081711e-05, - "loss": 0.6948, - "step": 8683 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0042882378921614e-05, - "loss": 0.6164, - "step": 8684 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0040887861055585e-05, - "loss": 0.6829, - "step": 8685 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0038893341562964e-05, - "loss": 0.6385, - "step": 8686 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0036898820523098e-05, - "loss": 0.6065, - "step": 8687 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0034904298015333e-05, - "loss": 0.638, - "step": 8688 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0032909774119018e-05, - "loss": 0.6593, - "step": 8689 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0030915248913495e-05, - "loss": 0.6418, - "step": 8690 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0028920722478109e-05, - "loss": 0.646, - "step": 8691 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0026926194892208e-05, - "loss": 0.6191, - "step": 8692 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0024931666235138e-05, - "loss": 0.6793, - "step": 8693 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0022937136586241e-05, - "loss": 0.6381, - "step": 8694 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0020942606024868e-05, - "loss": 0.6185, - "step": 8695 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0018948074630362e-05, - "loss": 0.6083, - "step": 8696 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0016953542482069e-05, - "loss": 0.6709, - "step": 8697 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0014959009659333e-05, - "loss": 0.7007, - "step": 8698 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0012964476241502e-05, - "loss": 0.6403, - "step": 8699 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0010969942307926e-05, - "loss": 0.6205, - "step": 8700 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0008975407937944e-05, - "loss": 0.6691, - "step": 8701 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0006980873210903e-05, - "loss": 0.71, - "step": 8702 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0004986338206153e-05, - "loss": 0.6324, - "step": 8703 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0002991803003039e-05, - "loss": 0.6412, - "step": 8704 - }, - { - "epoch": 1.57, - "grad_norm": 0.0, - "learning_rate": 1.0000997267680905e-05, - "loss": 0.5782, - "step": 8705 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.999002732319098e-06, - "loss": 0.5981, - "step": 8706 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.997008196996964e-06, - "loss": 0.7849, - "step": 8707 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.995013661793847e-06, - "loss": 0.5966, - "step": 8708 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.993019126789097e-06, - "loss": 0.6616, - "step": 8709 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.991024592062061e-06, - "loss": 0.6729, - "step": 8710 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.989030057692079e-06, - "loss": 0.6296, - "step": 8711 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.987035523758502e-06, - "loss": 0.5499, - "step": 8712 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.985040990340669e-06, - "loss": 0.6537, - "step": 8713 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.983046457517935e-06, - "loss": 0.7371, - "step": 8714 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.981051925369642e-06, - "loss": 0.5608, - "step": 8715 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.979057393975134e-06, - "loss": 0.5245, - "step": 8716 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.977062863413764e-06, - "loss": 0.6425, - "step": 8717 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.975068333764865e-06, - "loss": 0.6309, - "step": 8718 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.973073805107795e-06, - "loss": 0.6608, - "step": 8719 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.971079277521895e-06, - "loss": 0.5925, - "step": 8720 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.969084751086507e-06, - "loss": 0.6075, - "step": 8721 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.967090225880984e-06, - "loss": 0.7198, - "step": 8722 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.965095701984669e-06, - "loss": 0.6094, - "step": 8723 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.963101179476902e-06, - "loss": 0.5311, - "step": 8724 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.961106658437043e-06, - "loss": 0.5441, - "step": 8725 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.959112138944419e-06, - "loss": 0.6241, - "step": 8726 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.957117621078387e-06, - "loss": 0.6233, - "step": 8727 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.95512310491829e-06, - "loss": 0.692, - "step": 8728 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.953128590543471e-06, - "loss": 0.7356, - "step": 8729 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.951134078033278e-06, - "loss": 0.7128, - "step": 8730 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.949139567467054e-06, - "loss": 0.5859, - "step": 8731 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.947145058924145e-06, - "loss": 0.7387, - "step": 8732 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.945150552483902e-06, - "loss": 0.5947, - "step": 8733 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.943156048225658e-06, - "loss": 0.6337, - "step": 8734 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.941161546228765e-06, - "loss": 0.6592, - "step": 8735 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.939167046572566e-06, - "loss": 0.638, - "step": 8736 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.937172549336405e-06, - "loss": 0.6758, - "step": 8737 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.93517805459963e-06, - "loss": 0.5049, - "step": 8738 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.933183562441581e-06, - "loss": 0.6414, - "step": 8739 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.93118907294161e-06, - "loss": 0.6795, - "step": 8740 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.929194586179051e-06, - "loss": 0.6824, - "step": 8741 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.92720010223325e-06, - "loss": 0.6414, - "step": 8742 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.925205621183557e-06, - "loss": 0.6783, - "step": 8743 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.923211143109312e-06, - "loss": 0.5545, - "step": 8744 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.921216668089857e-06, - "loss": 0.5363, - "step": 8745 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.91922219620454e-06, - "loss": 0.6364, - "step": 8746 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.917227727532705e-06, - "loss": 0.7013, - "step": 8747 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.915233262153692e-06, - "loss": 0.5673, - "step": 8748 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.913238800146844e-06, - "loss": 0.5726, - "step": 8749 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.911244341591502e-06, - "loss": 0.6168, - "step": 8750 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.909249886567015e-06, - "loss": 0.6281, - "step": 8751 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.907255435152724e-06, - "loss": 0.6362, - "step": 8752 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.90526098742797e-06, - "loss": 0.561, - "step": 8753 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.903266543472093e-06, - "loss": 0.5673, - "step": 8754 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.901272103364443e-06, - "loss": 0.5316, - "step": 8755 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.899277667184357e-06, - "loss": 0.58, - "step": 8756 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.897283235011176e-06, - "loss": 0.5446, - "step": 8757 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.895288806924245e-06, - "loss": 0.7273, - "step": 8758 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.893294383002904e-06, - "loss": 0.5916, - "step": 8759 - }, - { - "epoch": 1.58, - "grad_norm": 0.0, - "learning_rate": 9.891299963326497e-06, - "loss": 0.7067, - "step": 8760 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.889305547974361e-06, - "loss": 0.5829, - "step": 8761 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.88731113702584e-06, - "loss": 0.602, - "step": 8762 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.885316730560281e-06, - "loss": 0.5857, - "step": 8763 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.883322328657013e-06, - "loss": 0.6508, - "step": 8764 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.881327931395384e-06, - "loss": 0.6186, - "step": 8765 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.879333538854733e-06, - "loss": 0.6494, - "step": 8766 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.877339151114399e-06, - "loss": 0.5882, - "step": 8767 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.875344768253725e-06, - "loss": 0.5968, - "step": 8768 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.873350390352051e-06, - "loss": 0.6328, - "step": 8769 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.871356017488716e-06, - "loss": 0.7219, - "step": 8770 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.869361649743059e-06, - "loss": 0.6143, - "step": 8771 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.867367287194416e-06, - "loss": 0.6318, - "step": 8772 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.865372929922132e-06, - "loss": 0.5844, - "step": 8773 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.863378578005542e-06, - "loss": 0.622, - "step": 8774 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.861384231523984e-06, - "loss": 0.6221, - "step": 8775 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.859389890556803e-06, - "loss": 0.6662, - "step": 8776 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.857395555183332e-06, - "loss": 0.7019, - "step": 8777 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.855401225482913e-06, - "loss": 0.6217, - "step": 8778 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.853406901534878e-06, - "loss": 0.6713, - "step": 8779 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.851412583418565e-06, - "loss": 0.6177, - "step": 8780 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.849418271213318e-06, - "loss": 0.6579, - "step": 8781 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.847423964998468e-06, - "loss": 0.5972, - "step": 8782 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.845429664853353e-06, - "loss": 0.6715, - "step": 8783 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.843435370857313e-06, - "loss": 0.7621, - "step": 8784 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.841441083089681e-06, - "loss": 0.5629, - "step": 8785 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.839446801629798e-06, - "loss": 0.6062, - "step": 8786 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.837452526556994e-06, - "loss": 0.6134, - "step": 8787 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.835458257950604e-06, - "loss": 0.5571, - "step": 8788 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.833463995889967e-06, - "loss": 0.5147, - "step": 8789 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.83146974045442e-06, - "loss": 0.6291, - "step": 8790 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.829475491723293e-06, - "loss": 0.5894, - "step": 8791 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.827481249775925e-06, - "loss": 0.7517, - "step": 8792 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.82548701469165e-06, - "loss": 0.5843, - "step": 8793 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.823492786549799e-06, - "loss": 0.7275, - "step": 8794 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.821498565429707e-06, - "loss": 0.6824, - "step": 8795 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.819504351410706e-06, - "loss": 0.6405, - "step": 8796 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.817510144572133e-06, - "loss": 0.4944, - "step": 8797 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.81551594499332e-06, - "loss": 0.5347, - "step": 8798 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.813521752753597e-06, - "loss": 0.6696, - "step": 8799 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.811527567932296e-06, - "loss": 0.6122, - "step": 8800 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.809533390608756e-06, - "loss": 0.6476, - "step": 8801 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.807539220862303e-06, - "loss": 0.584, - "step": 8802 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.80554505877227e-06, - "loss": 0.685, - "step": 8803 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.803550904417985e-06, - "loss": 0.7135, - "step": 8804 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.801556757878781e-06, - "loss": 0.6237, - "step": 8805 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.799562619233991e-06, - "loss": 0.5738, - "step": 8806 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.797568488562942e-06, - "loss": 0.5314, - "step": 8807 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.79557436594497e-06, - "loss": 0.5658, - "step": 8808 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.793580251459397e-06, - "loss": 0.6555, - "step": 8809 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.791586145185551e-06, - "loss": 0.632, - "step": 8810 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.789592047202771e-06, - "loss": 0.6381, - "step": 8811 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.787597957590377e-06, - "loss": 0.5751, - "step": 8812 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.7856038764277e-06, - "loss": 0.6323, - "step": 8813 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.783609803794069e-06, - "loss": 0.5525, - "step": 8814 - }, - { - "epoch": 1.59, - "grad_norm": 0.0, - "learning_rate": 9.781615739768809e-06, - "loss": 0.7322, - "step": 8815 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.779621684431254e-06, - "loss": 0.5651, - "step": 8816 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.777627637860722e-06, - "loss": 0.6089, - "step": 8817 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.775633600136543e-06, - "loss": 0.5566, - "step": 8818 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.773639571338044e-06, - "loss": 0.724, - "step": 8819 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.771645551544551e-06, - "loss": 0.5891, - "step": 8820 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.769651540835389e-06, - "loss": 0.7148, - "step": 8821 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.767657539289883e-06, - "loss": 0.6542, - "step": 8822 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.765663546987361e-06, - "loss": 0.6849, - "step": 8823 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.763669564007143e-06, - "loss": 0.6184, - "step": 8824 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.761675590428554e-06, - "loss": 0.557, - "step": 8825 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.759681626330917e-06, - "loss": 0.6885, - "step": 8826 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.75768767179356e-06, - "loss": 0.6399, - "step": 8827 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.755693726895799e-06, - "loss": 0.5433, - "step": 8828 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.75369979171696e-06, - "loss": 0.6025, - "step": 8829 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.751705866336366e-06, - "loss": 0.6723, - "step": 8830 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.749711950833341e-06, - "loss": 0.7546, - "step": 8831 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.747718045287202e-06, - "loss": 0.6014, - "step": 8832 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.74572414977727e-06, - "loss": 0.6005, - "step": 8833 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.743730264382866e-06, - "loss": 0.6307, - "step": 8834 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.741736389183313e-06, - "loss": 0.5694, - "step": 8835 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.739742524257928e-06, - "loss": 0.6742, - "step": 8836 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.737748669686028e-06, - "loss": 0.6856, - "step": 8837 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.73575482554694e-06, - "loss": 0.6151, - "step": 8838 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.73376099191998e-06, - "loss": 0.6673, - "step": 8839 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.73176716888446e-06, - "loss": 0.6066, - "step": 8840 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.729773356519702e-06, - "loss": 0.5257, - "step": 8841 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.72777955490502e-06, - "loss": 0.5548, - "step": 8842 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.725785764119738e-06, - "loss": 0.6729, - "step": 8843 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.723791984243164e-06, - "loss": 0.63, - "step": 8844 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.721798215354622e-06, - "loss": 0.6104, - "step": 8845 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.719804457533424e-06, - "loss": 0.6245, - "step": 8846 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.71781071085888e-06, - "loss": 0.6602, - "step": 8847 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.715816975410313e-06, - "loss": 0.6978, - "step": 8848 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.713823251267033e-06, - "loss": 0.6064, - "step": 8849 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.711829538508354e-06, - "loss": 0.5701, - "step": 8850 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.709835837213586e-06, - "loss": 0.6935, - "step": 8851 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.70784214746205e-06, - "loss": 0.5565, - "step": 8852 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.705848469333054e-06, - "loss": 0.6569, - "step": 8853 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.70385480290591e-06, - "loss": 0.5762, - "step": 8854 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.701861148259928e-06, - "loss": 0.7505, - "step": 8855 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.69986750547442e-06, - "loss": 0.6203, - "step": 8856 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.697873874628696e-06, - "loss": 0.6468, - "step": 8857 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.695880255802066e-06, - "loss": 0.6374, - "step": 8858 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.693886649073842e-06, - "loss": 0.705, - "step": 8859 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.691893054523332e-06, - "loss": 0.6546, - "step": 8860 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.689899472229846e-06, - "loss": 0.6559, - "step": 8861 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.687905902272687e-06, - "loss": 0.7587, - "step": 8862 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.685912344731167e-06, - "loss": 0.6762, - "step": 8863 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.683918799684591e-06, - "loss": 0.6749, - "step": 8864 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.681925267212267e-06, - "loss": 0.59, - "step": 8865 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.679931747393502e-06, - "loss": 0.6384, - "step": 8866 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.677938240307597e-06, - "loss": 0.6287, - "step": 8867 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.675944746033864e-06, - "loss": 0.8586, - "step": 8868 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.673951264651606e-06, - "loss": 0.7194, - "step": 8869 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.671957796240123e-06, - "loss": 0.6142, - "step": 8870 - }, - { - "epoch": 1.6, - "grad_norm": 0.0, - "learning_rate": 9.669964340878718e-06, - "loss": 0.6487, - "step": 8871 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.6679708986467e-06, - "loss": 0.7226, - "step": 8872 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.665977469623366e-06, - "loss": 0.5673, - "step": 8873 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.663984053888023e-06, - "loss": 0.6664, - "step": 8874 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.661990651519966e-06, - "loss": 0.4962, - "step": 8875 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.659997262598509e-06, - "loss": 0.644, - "step": 8876 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.658003887202935e-06, - "loss": 0.5688, - "step": 8877 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.656010525412556e-06, - "loss": 0.5202, - "step": 8878 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.654017177306667e-06, - "loss": 0.6396, - "step": 8879 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.652023842964563e-06, - "loss": 0.6418, - "step": 8880 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.65003052246555e-06, - "loss": 0.6775, - "step": 8881 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.648037215888923e-06, - "loss": 0.6056, - "step": 8882 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.646043923313976e-06, - "loss": 0.5602, - "step": 8883 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.644050644820016e-06, - "loss": 0.5516, - "step": 8884 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.642057380486323e-06, - "loss": 0.5536, - "step": 8885 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.640064130392202e-06, - "loss": 0.642, - "step": 8886 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.638070894616949e-06, - "loss": 0.6553, - "step": 8887 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.636077673239852e-06, - "loss": 0.6278, - "step": 8888 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.63408446634021e-06, - "loss": 0.7007, - "step": 8889 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.632091273997317e-06, - "loss": 0.6112, - "step": 8890 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.630098096290462e-06, - "loss": 0.7051, - "step": 8891 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.628104933298941e-06, - "loss": 0.5596, - "step": 8892 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.626111785102038e-06, - "loss": 0.6527, - "step": 8893 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.62411865177905e-06, - "loss": 0.7586, - "step": 8894 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.622125533409268e-06, - "loss": 0.5597, - "step": 8895 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.620132430071979e-06, - "loss": 0.561, - "step": 8896 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.61813934184647e-06, - "loss": 0.5627, - "step": 8897 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.616146268812035e-06, - "loss": 0.6308, - "step": 8898 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.61415321104796e-06, - "loss": 0.616, - "step": 8899 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.612160168633531e-06, - "loss": 0.6497, - "step": 8900 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.610167141648034e-06, - "loss": 0.592, - "step": 8901 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.608174130170752e-06, - "loss": 0.7647, - "step": 8902 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.606181134280978e-06, - "loss": 0.6204, - "step": 8903 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.604188154057994e-06, - "loss": 0.5882, - "step": 8904 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.60219518958108e-06, - "loss": 0.653, - "step": 8905 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.600202240929525e-06, - "loss": 0.5727, - "step": 8906 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.598209308182613e-06, - "loss": 0.6327, - "step": 8907 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.596216391419619e-06, - "loss": 0.6979, - "step": 8908 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.594223490719829e-06, - "loss": 0.7344, - "step": 8909 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.592230606162523e-06, - "loss": 0.6124, - "step": 8910 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.590237737826983e-06, - "loss": 0.6844, - "step": 8911 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.588244885792488e-06, - "loss": 0.7018, - "step": 8912 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.586252050138315e-06, - "loss": 0.73, - "step": 8913 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.584259230943752e-06, - "loss": 0.5502, - "step": 8914 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.58226642828806e-06, - "loss": 0.7097, - "step": 8915 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.580273642250528e-06, - "loss": 0.6262, - "step": 8916 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.57828087291043e-06, - "loss": 0.6924, - "step": 8917 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.57628812034704e-06, - "loss": 0.6386, - "step": 8918 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.574295384639635e-06, - "loss": 0.6325, - "step": 8919 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.57230266586749e-06, - "loss": 0.7086, - "step": 8920 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.570309964109874e-06, - "loss": 0.6548, - "step": 8921 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.56831727944607e-06, - "loss": 0.6788, - "step": 8922 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.56632461195534e-06, - "loss": 0.6289, - "step": 8923 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.56433196171696e-06, - "loss": 0.6696, - "step": 8924 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.562339328810203e-06, - "loss": 0.6229, - "step": 8925 - }, - { - "epoch": 1.61, - "grad_norm": 0.0, - "learning_rate": 9.560346713314332e-06, - "loss": 0.5603, - "step": 8926 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.558354115308627e-06, - "loss": 0.5875, - "step": 8927 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.55636153487235e-06, - "loss": 0.6555, - "step": 8928 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.554368972084774e-06, - "loss": 0.6275, - "step": 8929 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.552376427025161e-06, - "loss": 0.7649, - "step": 8930 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.55038389977278e-06, - "loss": 0.5946, - "step": 8931 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.5483913904069e-06, - "loss": 0.6586, - "step": 8932 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.546398899006782e-06, - "loss": 0.5859, - "step": 8933 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.54440642565169e-06, - "loss": 0.725, - "step": 8934 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.542413970420897e-06, - "loss": 0.5509, - "step": 8935 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.540421533393657e-06, - "loss": 0.6105, - "step": 8936 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.53842911464924e-06, - "loss": 0.7291, - "step": 8937 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.536436714266898e-06, - "loss": 0.5981, - "step": 8938 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.534444332325901e-06, - "loss": 0.6381, - "step": 8939 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.532451968905503e-06, - "loss": 0.6057, - "step": 8940 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.530459624084967e-06, - "loss": 0.6257, - "step": 8941 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.528467297943553e-06, - "loss": 0.5894, - "step": 8942 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.526474990560515e-06, - "loss": 0.6755, - "step": 8943 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.524482702015117e-06, - "loss": 0.5849, - "step": 8944 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.522490432386611e-06, - "loss": 0.6133, - "step": 8945 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.520498181754254e-06, - "loss": 0.639, - "step": 8946 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.5185059501973e-06, - "loss": 0.6517, - "step": 8947 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.516513737795001e-06, - "loss": 0.6519, - "step": 8948 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.514521544626618e-06, - "loss": 0.657, - "step": 8949 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.512529370771397e-06, - "loss": 0.674, - "step": 8950 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.510537216308592e-06, - "loss": 0.571, - "step": 8951 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.508545081317462e-06, - "loss": 0.5931, - "step": 8952 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.506552965877243e-06, - "loss": 0.573, - "step": 8953 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.504560870067194e-06, - "loss": 0.638, - "step": 8954 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.502568793966564e-06, - "loss": 0.6561, - "step": 8955 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.500576737654596e-06, - "loss": 0.7101, - "step": 8956 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.498584701210544e-06, - "loss": 0.7038, - "step": 8957 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.496592684713652e-06, - "loss": 0.6489, - "step": 8958 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.494600688243162e-06, - "loss": 0.6375, - "step": 8959 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.492608711878329e-06, - "loss": 0.6585, - "step": 8960 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.490616755698385e-06, - "loss": 0.624, - "step": 8961 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.488624819782581e-06, - "loss": 0.6327, - "step": 8962 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.486632904210157e-06, - "loss": 0.6901, - "step": 8963 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.484641009060353e-06, - "loss": 0.6285, - "step": 8964 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.482649134412416e-06, - "loss": 0.644, - "step": 8965 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.480657280345582e-06, - "loss": 0.6372, - "step": 8966 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.478665446939093e-06, - "loss": 0.6461, - "step": 8967 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.476673634272186e-06, - "loss": 0.6696, - "step": 8968 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.474681842424093e-06, - "loss": 0.7917, - "step": 8969 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.47269007147406e-06, - "loss": 0.5974, - "step": 8970 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.470698321501318e-06, - "loss": 0.6711, - "step": 8971 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.468706592585103e-06, - "loss": 0.5297, - "step": 8972 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.46671488480465e-06, - "loss": 0.5878, - "step": 8973 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.464723198239193e-06, - "loss": 0.6415, - "step": 8974 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.462731532967969e-06, - "loss": 0.6516, - "step": 8975 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.4607398890702e-06, - "loss": 0.6395, - "step": 8976 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.45874826662512e-06, - "loss": 0.6089, - "step": 8977 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.456756665711964e-06, - "loss": 0.6315, - "step": 8978 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.45476508640996e-06, - "loss": 0.5598, - "step": 8979 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.452773528798332e-06, - "loss": 0.5943, - "step": 8980 - }, - { - "epoch": 1.62, - "grad_norm": 0.0, - "learning_rate": 9.450781992956311e-06, - "loss": 0.5847, - "step": 8981 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.448790478963126e-06, - "loss": 0.6507, - "step": 8982 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.446798986897997e-06, - "loss": 0.658, - "step": 8983 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.444807516840154e-06, - "loss": 0.613, - "step": 8984 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.442816068868818e-06, - "loss": 0.7455, - "step": 8985 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.44082464306321e-06, - "loss": 0.7785, - "step": 8986 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.43883323950256e-06, - "loss": 0.6147, - "step": 8987 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.436841858266084e-06, - "loss": 0.6693, - "step": 8988 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.434850499433001e-06, - "loss": 0.6037, - "step": 8989 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.43285916308254e-06, - "loss": 0.6505, - "step": 8990 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.430867849293908e-06, - "loss": 0.7153, - "step": 8991 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.428876558146328e-06, - "loss": 0.5483, - "step": 8992 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.426885289719019e-06, - "loss": 0.6923, - "step": 8993 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.424894044091191e-06, - "loss": 0.558, - "step": 8994 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.422902821342067e-06, - "loss": 0.6074, - "step": 8995 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.420911621550856e-06, - "loss": 0.745, - "step": 8996 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.41892044479677e-06, - "loss": 0.6332, - "step": 8997 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.416929291159031e-06, - "loss": 0.6167, - "step": 8998 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.414938160716839e-06, - "loss": 0.5823, - "step": 8999 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.41294705354941e-06, - "loss": 0.6742, - "step": 9000 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.410955969735953e-06, - "loss": 0.6112, - "step": 9001 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.408964909355674e-06, - "loss": 0.6579, - "step": 9002 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.406973872487788e-06, - "loss": 0.5915, - "step": 9003 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.404982859211493e-06, - "loss": 0.584, - "step": 9004 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.402991869606004e-06, - "loss": 0.6068, - "step": 9005 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.40100090375052e-06, - "loss": 0.5691, - "step": 9006 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.399009961724241e-06, - "loss": 0.5872, - "step": 9007 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.39701904360638e-06, - "loss": 0.6174, - "step": 9008 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.395028149476134e-06, - "loss": 0.5653, - "step": 9009 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.3930372794127e-06, - "loss": 0.6368, - "step": 9010 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.391046433495287e-06, - "loss": 0.6593, - "step": 9011 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.389055611803092e-06, - "loss": 0.6281, - "step": 9012 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.387064814415311e-06, - "loss": 0.5673, - "step": 9013 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.385074041411139e-06, - "loss": 0.6095, - "step": 9014 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.383083292869775e-06, - "loss": 0.5751, - "step": 9015 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.381092568870416e-06, - "loss": 0.7227, - "step": 9016 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.379101869492254e-06, - "loss": 0.7044, - "step": 9017 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.377111194814482e-06, - "loss": 0.5819, - "step": 9018 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.375120544916296e-06, - "loss": 0.5644, - "step": 9019 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.373129919876886e-06, - "loss": 0.6317, - "step": 9020 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.37113931977544e-06, - "loss": 0.6435, - "step": 9021 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.36914874469115e-06, - "loss": 0.7062, - "step": 9022 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.3671581947032e-06, - "loss": 0.5718, - "step": 9023 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.365167669890785e-06, - "loss": 0.5529, - "step": 9024 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.363177170333087e-06, - "loss": 0.591, - "step": 9025 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.36118669610929e-06, - "loss": 0.6439, - "step": 9026 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.359196247298581e-06, - "loss": 0.6373, - "step": 9027 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.357205823980147e-06, - "loss": 0.6495, - "step": 9028 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.355215426233165e-06, - "loss": 0.6257, - "step": 9029 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.353225054136817e-06, - "loss": 0.6364, - "step": 9030 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.351234707770284e-06, - "loss": 0.7846, - "step": 9031 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.349244387212744e-06, - "loss": 0.6186, - "step": 9032 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.34725409254338e-06, - "loss": 0.6237, - "step": 9033 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.345263823841365e-06, - "loss": 0.6185, - "step": 9034 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.343273581185879e-06, - "loss": 0.7249, - "step": 9035 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.341283364656095e-06, - "loss": 0.6211, - "step": 9036 - }, - { - "epoch": 1.63, - "grad_norm": 0.0, - "learning_rate": 9.339293174331183e-06, - "loss": 0.6588, - "step": 9037 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.337303010290321e-06, - "loss": 0.6175, - "step": 9038 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.335312872612683e-06, - "loss": 0.6727, - "step": 9039 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.333322761377434e-06, - "loss": 0.756, - "step": 9040 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.331332676663751e-06, - "loss": 0.5537, - "step": 9041 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.3293426185508e-06, - "loss": 0.5756, - "step": 9042 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.32735258711775e-06, - "loss": 0.6802, - "step": 9043 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.325362582443763e-06, - "loss": 0.5944, - "step": 9044 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.323372604608008e-06, - "loss": 0.6989, - "step": 9045 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.321382653689652e-06, - "loss": 0.6164, - "step": 9046 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.319392729767855e-06, - "loss": 0.6812, - "step": 9047 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.317402832921779e-06, - "loss": 0.6127, - "step": 9048 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.31541296323059e-06, - "loss": 0.5665, - "step": 9049 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.313423120773448e-06, - "loss": 0.5764, - "step": 9050 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.311433305629512e-06, - "loss": 0.5192, - "step": 9051 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.309443517877935e-06, - "loss": 0.6515, - "step": 9052 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.307453757597876e-06, - "loss": 0.5357, - "step": 9053 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.305464024868495e-06, - "loss": 0.5792, - "step": 9054 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.303474319768946e-06, - "loss": 0.6823, - "step": 9055 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.30148464237838e-06, - "loss": 0.5471, - "step": 9056 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.299494992775952e-06, - "loss": 0.7218, - "step": 9057 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.297505371040816e-06, - "loss": 0.5996, - "step": 9058 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.295515777252119e-06, - "loss": 0.6622, - "step": 9059 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.293526211489009e-06, - "loss": 0.6309, - "step": 9060 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.291536673830636e-06, - "loss": 0.5551, - "step": 9061 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.28954716435615e-06, - "loss": 0.5972, - "step": 9062 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.287557683144694e-06, - "loss": 0.6268, - "step": 9063 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.285568230275411e-06, - "loss": 0.606, - "step": 9064 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.28357880582745e-06, - "loss": 0.4844, - "step": 9065 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.281589409879956e-06, - "loss": 0.5607, - "step": 9066 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.279600042512062e-06, - "loss": 0.6454, - "step": 9067 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.277610703802911e-06, - "loss": 0.7655, - "step": 9068 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.275621393831644e-06, - "loss": 0.5525, - "step": 9069 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.273632112677401e-06, - "loss": 0.7188, - "step": 9070 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.271642860419315e-06, - "loss": 0.6711, - "step": 9071 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.269653637136525e-06, - "loss": 0.6139, - "step": 9072 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.267664442908166e-06, - "loss": 0.5322, - "step": 9073 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.265675277813368e-06, - "loss": 0.6645, - "step": 9074 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.263686141931266e-06, - "loss": 0.6875, - "step": 9075 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.26169703534099e-06, - "loss": 0.6464, - "step": 9076 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.25970795812167e-06, - "loss": 0.6639, - "step": 9077 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.257718910352435e-06, - "loss": 0.6364, - "step": 9078 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.255729892112415e-06, - "loss": 0.6248, - "step": 9079 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.253740903480734e-06, - "loss": 0.6407, - "step": 9080 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.251751944536522e-06, - "loss": 0.4995, - "step": 9081 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.249763015358897e-06, - "loss": 0.6554, - "step": 9082 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.247774116026982e-06, - "loss": 0.5218, - "step": 9083 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.245785246619903e-06, - "loss": 0.6362, - "step": 9084 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.24379640721678e-06, - "loss": 0.6029, - "step": 9085 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.241807597896728e-06, - "loss": 0.6483, - "step": 9086 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.239818818738872e-06, - "loss": 0.6039, - "step": 9087 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.237830069822327e-06, - "loss": 0.6287, - "step": 9088 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.235841351226206e-06, - "loss": 0.6131, - "step": 9089 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.233852663029624e-06, - "loss": 0.6499, - "step": 9090 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.231864005311695e-06, - "loss": 0.6107, - "step": 9091 - }, - { - "epoch": 1.64, - "grad_norm": 0.0, - "learning_rate": 9.229875378151533e-06, - "loss": 0.6138, - "step": 9092 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.227886781628246e-06, - "loss": 0.5776, - "step": 9093 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.225898215820946e-06, - "loss": 0.5599, - "step": 9094 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.223909680808743e-06, - "loss": 0.57, - "step": 9095 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.221921176670744e-06, - "loss": 0.5533, - "step": 9096 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.21993270348605e-06, - "loss": 0.72, - "step": 9097 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.217944261333771e-06, - "loss": 0.5486, - "step": 9098 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.215955850293007e-06, - "loss": 0.7435, - "step": 9099 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.213967470442865e-06, - "loss": 0.5696, - "step": 9100 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.211979121862442e-06, - "loss": 0.6344, - "step": 9101 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.209990804630838e-06, - "loss": 0.6742, - "step": 9102 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.208002518827155e-06, - "loss": 0.6521, - "step": 9103 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.206014264530491e-06, - "loss": 0.6744, - "step": 9104 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.204026041819937e-06, - "loss": 0.7163, - "step": 9105 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.202037850774592e-06, - "loss": 0.672, - "step": 9106 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.200049691473545e-06, - "loss": 0.634, - "step": 9107 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.198061563995894e-06, - "loss": 0.5653, - "step": 9108 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.196073468420729e-06, - "loss": 0.6509, - "step": 9109 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.194085404827135e-06, - "loss": 0.6127, - "step": 9110 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.192097373294211e-06, - "loss": 0.5705, - "step": 9111 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.19010937390103e-06, - "loss": 0.5877, - "step": 9112 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.18812140672669e-06, - "loss": 0.5943, - "step": 9113 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.186133471850271e-06, - "loss": 0.6461, - "step": 9114 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.184145569350853e-06, - "loss": 0.5087, - "step": 9115 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.182157699307525e-06, - "loss": 0.6008, - "step": 9116 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.180169861799363e-06, - "loss": 0.5963, - "step": 9117 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.178182056905451e-06, - "loss": 0.6893, - "step": 9118 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.176194284704864e-06, - "loss": 0.6463, - "step": 9119 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.174206545276678e-06, - "loss": 0.678, - "step": 9120 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.172218838699972e-06, - "loss": 0.6658, - "step": 9121 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.170231165053818e-06, - "loss": 0.6097, - "step": 9122 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.16824352441729e-06, - "loss": 0.6293, - "step": 9123 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.166255916869456e-06, - "loss": 0.6734, - "step": 9124 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.164268342489394e-06, - "loss": 0.63, - "step": 9125 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.16228080135617e-06, - "loss": 0.5762, - "step": 9126 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.160293293548848e-06, - "loss": 0.6222, - "step": 9127 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.158305819146499e-06, - "loss": 0.5752, - "step": 9128 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.156318378228183e-06, - "loss": 0.6749, - "step": 9129 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.15433097087297e-06, - "loss": 0.6768, - "step": 9130 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.15234359715992e-06, - "loss": 0.6144, - "step": 9131 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.150356257168091e-06, - "loss": 0.6014, - "step": 9132 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.148368950976549e-06, - "loss": 0.5327, - "step": 9133 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.146381678664348e-06, - "loss": 0.7347, - "step": 9134 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.144394440310547e-06, - "loss": 0.5876, - "step": 9135 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.1424072359942e-06, - "loss": 0.6347, - "step": 9136 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.140420065794361e-06, - "loss": 0.6263, - "step": 9137 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.138432929790085e-06, - "loss": 0.6628, - "step": 9138 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.136445828060423e-06, - "loss": 0.5686, - "step": 9139 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.134458760684425e-06, - "loss": 0.6534, - "step": 9140 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.132471727741145e-06, - "loss": 0.6508, - "step": 9141 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.130484729309621e-06, - "loss": 0.5957, - "step": 9142 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.128497765468905e-06, - "loss": 0.63, - "step": 9143 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.12651083629804e-06, - "loss": 0.6394, - "step": 9144 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.12452394187607e-06, - "loss": 0.5914, - "step": 9145 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.122537082282038e-06, - "loss": 0.793, - "step": 9146 - }, - { - "epoch": 1.65, - "grad_norm": 0.0, - "learning_rate": 9.120550257594985e-06, - "loss": 0.5912, - "step": 9147 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.118563467893948e-06, - "loss": 0.5405, - "step": 9148 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.11657671325797e-06, - "loss": 0.5178, - "step": 9149 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.11458999376608e-06, - "loss": 0.6832, - "step": 9150 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.112603309497317e-06, - "loss": 0.6202, - "step": 9151 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.110616660530715e-06, - "loss": 0.7108, - "step": 9152 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.108630046945305e-06, - "loss": 0.6111, - "step": 9153 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.10664346882012e-06, - "loss": 0.6533, - "step": 9154 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.104656926234188e-06, - "loss": 0.6852, - "step": 9155 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.102670419266535e-06, - "loss": 0.5709, - "step": 9156 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.100683947996197e-06, - "loss": 0.6051, - "step": 9157 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.098697512502186e-06, - "loss": 0.6602, - "step": 9158 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.096711112863532e-06, - "loss": 0.6668, - "step": 9159 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.09472474915926e-06, - "loss": 0.5918, - "step": 9160 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.092738421468384e-06, - "loss": 0.6355, - "step": 9161 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.090752129869932e-06, - "loss": 0.6101, - "step": 9162 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.088765874442916e-06, - "loss": 0.5849, - "step": 9163 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.086779655266358e-06, - "loss": 0.5581, - "step": 9164 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.084793472419268e-06, - "loss": 0.6701, - "step": 9165 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.082807325980658e-06, - "loss": 0.6012, - "step": 9166 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.080821216029546e-06, - "loss": 0.6497, - "step": 9167 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.078835142644942e-06, - "loss": 0.5971, - "step": 9168 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.076849105905852e-06, - "loss": 0.6539, - "step": 9169 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.074863105891286e-06, - "loss": 0.5705, - "step": 9170 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.072877142680252e-06, - "loss": 0.613, - "step": 9171 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.070891216351754e-06, - "loss": 0.6925, - "step": 9172 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.068905326984797e-06, - "loss": 0.6499, - "step": 9173 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.066919474658376e-06, - "loss": 0.6105, - "step": 9174 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.064933659451498e-06, - "loss": 0.634, - "step": 9175 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.062947881443162e-06, - "loss": 0.6095, - "step": 9176 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.060962140712365e-06, - "loss": 0.6018, - "step": 9177 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.0589764373381e-06, - "loss": 0.6079, - "step": 9178 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.05699077139937e-06, - "loss": 0.5518, - "step": 9179 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.055005142975158e-06, - "loss": 0.5631, - "step": 9180 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.053019552144462e-06, - "loss": 0.6807, - "step": 9181 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.05103399898627e-06, - "loss": 0.6125, - "step": 9182 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.049048483579569e-06, - "loss": 0.7854, - "step": 9183 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.04706300600335e-06, - "loss": 0.6205, - "step": 9184 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.0450775663366e-06, - "loss": 0.6062, - "step": 9185 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.043092164658293e-06, - "loss": 0.596, - "step": 9186 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.041106801047429e-06, - "loss": 0.5934, - "step": 9187 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.039121475582973e-06, - "loss": 0.5946, - "step": 9188 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.03713618834391e-06, - "loss": 0.6328, - "step": 9189 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.035150939409221e-06, - "loss": 0.5968, - "step": 9190 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.03316572885788e-06, - "loss": 0.748, - "step": 9191 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.031180556768862e-06, - "loss": 0.6268, - "step": 9192 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.029195423221142e-06, - "loss": 0.6652, - "step": 9193 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.027210328293693e-06, - "loss": 0.6989, - "step": 9194 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.025225272065481e-06, - "loss": 0.6873, - "step": 9195 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.023240254615478e-06, - "loss": 0.6007, - "step": 9196 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.021255276022652e-06, - "loss": 0.604, - "step": 9197 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.019270336365967e-06, - "loss": 0.6435, - "step": 9198 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.017285435724385e-06, - "loss": 0.6885, - "step": 9199 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.015300574176875e-06, - "loss": 0.5881, - "step": 9200 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.013315751802395e-06, - "loss": 0.5909, - "step": 9201 - }, - { - "epoch": 1.66, - "grad_norm": 0.0, - "learning_rate": 9.011330968679906e-06, - "loss": 0.6326, - "step": 9202 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 9.009346224888365e-06, - "loss": 0.572, - "step": 9203 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 9.007361520506724e-06, - "loss": 0.6011, - "step": 9204 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 9.005376855613946e-06, - "loss": 0.5892, - "step": 9205 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 9.003392230288979e-06, - "loss": 0.6628, - "step": 9206 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 9.001407644610774e-06, - "loss": 0.6363, - "step": 9207 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.999423098658286e-06, - "loss": 0.8227, - "step": 9208 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.997438592510462e-06, - "loss": 0.7042, - "step": 9209 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.99545412624625e-06, - "loss": 0.6181, - "step": 9210 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.993469699944593e-06, - "loss": 0.5655, - "step": 9211 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.991485313684431e-06, - "loss": 0.7089, - "step": 9212 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.989500967544715e-06, - "loss": 0.6254, - "step": 9213 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.987516661604381e-06, - "loss": 0.5139, - "step": 9214 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.98553239594237e-06, - "loss": 0.5531, - "step": 9215 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.983548170637612e-06, - "loss": 0.718, - "step": 9216 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.981563985769057e-06, - "loss": 0.6154, - "step": 9217 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.979579841415628e-06, - "loss": 0.6562, - "step": 9218 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.977595737656262e-06, - "loss": 0.6532, - "step": 9219 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.975611674569888e-06, - "loss": 0.5752, - "step": 9220 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.973627652235434e-06, - "loss": 0.6131, - "step": 9221 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.971643670731831e-06, - "loss": 0.6667, - "step": 9222 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.969659730138007e-06, - "loss": 0.6669, - "step": 9223 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.967675830532881e-06, - "loss": 0.6647, - "step": 9224 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.965691971995386e-06, - "loss": 0.5993, - "step": 9225 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.963708154604428e-06, - "loss": 0.5361, - "step": 9226 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.96172437843894e-06, - "loss": 0.6468, - "step": 9227 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.959740643577833e-06, - "loss": 0.6215, - "step": 9228 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.957756950100024e-06, - "loss": 0.5491, - "step": 9229 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.955773298084429e-06, - "loss": 0.6304, - "step": 9230 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.953789687609963e-06, - "loss": 0.604, - "step": 9231 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.951806118755536e-06, - "loss": 0.5765, - "step": 9232 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.949822591600057e-06, - "loss": 0.6654, - "step": 9233 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.947839106222432e-06, - "loss": 0.6008, - "step": 9234 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.945855662701572e-06, - "loss": 0.6754, - "step": 9235 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.943872261116379e-06, - "loss": 0.6488, - "step": 9236 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.941888901545754e-06, - "loss": 0.589, - "step": 9237 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.939905584068605e-06, - "loss": 0.7828, - "step": 9238 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.937922308763826e-06, - "loss": 0.5065, - "step": 9239 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.935939075710319e-06, - "loss": 0.6607, - "step": 9240 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.933955884986977e-06, - "loss": 0.6359, - "step": 9241 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.931972736672694e-06, - "loss": 0.6681, - "step": 9242 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.929989630846369e-06, - "loss": 0.6864, - "step": 9243 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.928006567586886e-06, - "loss": 0.6473, - "step": 9244 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.926023546973139e-06, - "loss": 0.5922, - "step": 9245 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.924040569084016e-06, - "loss": 0.677, - "step": 9246 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.922057633998406e-06, - "loss": 0.6072, - "step": 9247 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.920074741795185e-06, - "loss": 0.6442, - "step": 9248 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.918091892553244e-06, - "loss": 0.5899, - "step": 9249 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.916109086351457e-06, - "loss": 0.595, - "step": 9250 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.914126323268709e-06, - "loss": 0.6237, - "step": 9251 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.912143603383876e-06, - "loss": 0.4914, - "step": 9252 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.910160926775833e-06, - "loss": 0.6208, - "step": 9253 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.908178293523458e-06, - "loss": 0.6921, - "step": 9254 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.90619570370562e-06, - "loss": 0.6398, - "step": 9255 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.90421315740119e-06, - "loss": 0.6446, - "step": 9256 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.90223065468904e-06, - "loss": 0.6202, - "step": 9257 - }, - { - "epoch": 1.67, - "grad_norm": 0.0, - "learning_rate": 8.90024819564803e-06, - "loss": 0.6143, - "step": 9258 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.898265780357036e-06, - "loss": 0.7127, - "step": 9259 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.896283408894915e-06, - "loss": 0.6571, - "step": 9260 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.89430108134053e-06, - "loss": 0.742, - "step": 9261 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.89231879777274e-06, - "loss": 0.5282, - "step": 9262 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.890336558270412e-06, - "loss": 0.6105, - "step": 9263 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.888354362912393e-06, - "loss": 0.6651, - "step": 9264 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.886372211777543e-06, - "loss": 0.5651, - "step": 9265 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.884390104944714e-06, - "loss": 0.6173, - "step": 9266 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.882408042492755e-06, - "loss": 0.6193, - "step": 9267 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.88042602450052e-06, - "loss": 0.6482, - "step": 9268 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.878444051046856e-06, - "loss": 0.5119, - "step": 9269 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.876462122210611e-06, - "loss": 0.6552, - "step": 9270 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.874480238070626e-06, - "loss": 0.6485, - "step": 9271 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.872498398705743e-06, - "loss": 0.6203, - "step": 9272 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.870516604194808e-06, - "loss": 0.6135, - "step": 9273 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.868534854616656e-06, - "loss": 0.644, - "step": 9274 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.866553150050125e-06, - "loss": 0.6906, - "step": 9275 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.864571490574053e-06, - "loss": 0.6756, - "step": 9276 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.862589876267272e-06, - "loss": 0.6062, - "step": 9277 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.860608307208618e-06, - "loss": 0.6566, - "step": 9278 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.858626783476912e-06, - "loss": 0.6369, - "step": 9279 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.856645305150987e-06, - "loss": 0.5402, - "step": 9280 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.854663872309673e-06, - "loss": 0.5514, - "step": 9281 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.852682485031792e-06, - "loss": 0.6994, - "step": 9282 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.850701143396167e-06, - "loss": 0.8177, - "step": 9283 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.84871984748162e-06, - "loss": 0.5512, - "step": 9284 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.84673859736697e-06, - "loss": 0.595, - "step": 9285 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.844757393131035e-06, - "loss": 0.7449, - "step": 9286 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.842776234852628e-06, - "loss": 0.617, - "step": 9287 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.840795122610563e-06, - "loss": 0.6237, - "step": 9288 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.838814056483658e-06, - "loss": 0.6518, - "step": 9289 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.836833036550717e-06, - "loss": 0.6853, - "step": 9290 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.834852062890549e-06, - "loss": 0.5216, - "step": 9291 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.832871135581962e-06, - "loss": 0.537, - "step": 9292 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.830890254703766e-06, - "loss": 0.5402, - "step": 9293 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.828909420334754e-06, - "loss": 0.6852, - "step": 9294 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.826928632553732e-06, - "loss": 0.5719, - "step": 9295 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.824947891439498e-06, - "loss": 0.6636, - "step": 9296 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.822967197070851e-06, - "loss": 0.6415, - "step": 9297 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.820986549526584e-06, - "loss": 0.6709, - "step": 9298 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.819005948885491e-06, - "loss": 0.6262, - "step": 9299 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.817025395226372e-06, - "loss": 0.6573, - "step": 9300 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.815044888628001e-06, - "loss": 0.7174, - "step": 9301 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.813064429169177e-06, - "loss": 0.5722, - "step": 9302 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.811084016928684e-06, - "loss": 0.6208, - "step": 9303 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.809103651985302e-06, - "loss": 0.6171, - "step": 9304 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.80712333441782e-06, - "loss": 0.5318, - "step": 9305 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.805143064305016e-06, - "loss": 0.6677, - "step": 9306 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.803162841725666e-06, - "loss": 0.6023, - "step": 9307 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.801182666758551e-06, - "loss": 0.5986, - "step": 9308 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.799202539482439e-06, - "loss": 0.6688, - "step": 9309 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.797222459976109e-06, - "loss": 0.6018, - "step": 9310 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.79524242831833e-06, - "loss": 0.724, - "step": 9311 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.793262444587874e-06, - "loss": 0.6428, - "step": 9312 - }, - { - "epoch": 1.68, - "grad_norm": 0.0, - "learning_rate": 8.791282508863499e-06, - "loss": 0.6518, - "step": 9313 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.78930262122398e-06, - "loss": 0.608, - "step": 9314 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.787322781748078e-06, - "loss": 0.735, - "step": 9315 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.785342990514554e-06, - "loss": 0.6232, - "step": 9316 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.783363247602164e-06, - "loss": 0.5914, - "step": 9317 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.781383553089667e-06, - "loss": 0.6189, - "step": 9318 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.779403907055822e-06, - "loss": 0.5498, - "step": 9319 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.77742430957938e-06, - "loss": 0.5742, - "step": 9320 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.775444760739092e-06, - "loss": 0.5473, - "step": 9321 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.773465260613712e-06, - "loss": 0.6691, - "step": 9322 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.771485809281986e-06, - "loss": 0.636, - "step": 9323 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.769506406822658e-06, - "loss": 0.5371, - "step": 9324 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.767527053314472e-06, - "loss": 0.6773, - "step": 9325 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.76554774883617e-06, - "loss": 0.5394, - "step": 9326 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.763568493466495e-06, - "loss": 0.7368, - "step": 9327 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.761589287284183e-06, - "loss": 0.6159, - "step": 9328 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.759610130367968e-06, - "loss": 0.5479, - "step": 9329 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.75763102279659e-06, - "loss": 0.6862, - "step": 9330 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.75565196464878e-06, - "loss": 0.5988, - "step": 9331 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.753672956003264e-06, - "loss": 0.6154, - "step": 9332 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.751693996938774e-06, - "loss": 0.5802, - "step": 9333 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.749715087534033e-06, - "loss": 0.6137, - "step": 9334 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.74773622786777e-06, - "loss": 0.6095, - "step": 9335 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.745757418018704e-06, - "loss": 0.7055, - "step": 9336 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.743778658065554e-06, - "loss": 0.5576, - "step": 9337 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.741799948087048e-06, - "loss": 0.6703, - "step": 9338 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.739821288161889e-06, - "loss": 0.5146, - "step": 9339 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.737842678368802e-06, - "loss": 0.6987, - "step": 9340 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.735864118786493e-06, - "loss": 0.618, - "step": 9341 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.733885609493674e-06, - "loss": 0.596, - "step": 9342 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.731907150569057e-06, - "loss": 0.6639, - "step": 9343 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.729928742091344e-06, - "loss": 0.6384, - "step": 9344 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.727950384139242e-06, - "loss": 0.5666, - "step": 9345 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.725972076791457e-06, - "loss": 0.6257, - "step": 9346 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.723993820126679e-06, - "loss": 0.7252, - "step": 9347 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.722015614223616e-06, - "loss": 0.5437, - "step": 9348 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.72003745916096e-06, - "loss": 0.69, - "step": 9349 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.718059355017405e-06, - "loss": 0.6561, - "step": 9350 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.716081301871649e-06, - "loss": 0.6569, - "step": 9351 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.714103299802375e-06, - "loss": 0.5414, - "step": 9352 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.71212534888828e-06, - "loss": 0.6671, - "step": 9353 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.71014744920804e-06, - "loss": 0.6687, - "step": 9354 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.708169600840342e-06, - "loss": 0.5876, - "step": 9355 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.706191803863874e-06, - "loss": 0.6684, - "step": 9356 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.704214058357313e-06, - "loss": 0.5651, - "step": 9357 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.702236364399335e-06, - "loss": 0.6512, - "step": 9358 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.700258722068615e-06, - "loss": 0.5958, - "step": 9359 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.698281131443834e-06, - "loss": 0.6731, - "step": 9360 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.696303592603659e-06, - "loss": 0.6251, - "step": 9361 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.69432610562676e-06, - "loss": 0.6425, - "step": 9362 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.692348670591804e-06, - "loss": 0.6641, - "step": 9363 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.690371287577457e-06, - "loss": 0.5967, - "step": 9364 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.688393956662386e-06, - "loss": 0.565, - "step": 9365 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.68641667792525e-06, - "loss": 0.6635, - "step": 9366 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.684439451444706e-06, - "loss": 0.6202, - "step": 9367 - }, - { - "epoch": 1.69, - "grad_norm": 0.0, - "learning_rate": 8.682462277299418e-06, - "loss": 0.6912, - "step": 9368 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.68048515556804e-06, - "loss": 0.5662, - "step": 9369 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.678508086329219e-06, - "loss": 0.6901, - "step": 9370 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.676531069661611e-06, - "loss": 0.61, - "step": 9371 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.674554105643863e-06, - "loss": 0.562, - "step": 9372 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.672577194354623e-06, - "loss": 0.6601, - "step": 9373 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.670600335872538e-06, - "loss": 0.5578, - "step": 9374 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.668623530276245e-06, - "loss": 0.7065, - "step": 9375 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.666646777644398e-06, - "loss": 0.6848, - "step": 9376 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.664670078055619e-06, - "loss": 0.6449, - "step": 9377 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.662693431588554e-06, - "loss": 0.5964, - "step": 9378 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.660716838321834e-06, - "loss": 0.5137, - "step": 9379 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.658740298334091e-06, - "loss": 0.6745, - "step": 9380 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.656763811703959e-06, - "loss": 0.5783, - "step": 9381 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.654787378510064e-06, - "loss": 0.6737, - "step": 9382 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.65281099883103e-06, - "loss": 0.6529, - "step": 9383 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.650834672745488e-06, - "loss": 0.5711, - "step": 9384 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.648858400332046e-06, - "loss": 0.6206, - "step": 9385 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.646882181669335e-06, - "loss": 0.6367, - "step": 9386 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.64490601683597e-06, - "loss": 0.6478, - "step": 9387 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.64292990591056e-06, - "loss": 0.6449, - "step": 9388 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.640953848971727e-06, - "loss": 0.584, - "step": 9389 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.638977846098079e-06, - "loss": 0.5942, - "step": 9390 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.637001897368224e-06, - "loss": 0.5906, - "step": 9391 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.635026002860768e-06, - "loss": 0.5438, - "step": 9392 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.633050162654312e-06, - "loss": 0.5691, - "step": 9393 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.631074376827467e-06, - "loss": 0.5687, - "step": 9394 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.629098645458826e-06, - "loss": 0.5893, - "step": 9395 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.627122968626991e-06, - "loss": 0.6829, - "step": 9396 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.625147346410555e-06, - "loss": 0.642, - "step": 9397 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.623171778888111e-06, - "loss": 0.6078, - "step": 9398 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.621196266138257e-06, - "loss": 0.5465, - "step": 9399 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.619220808239575e-06, - "loss": 0.7052, - "step": 9400 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.617245405270656e-06, - "loss": 0.4794, - "step": 9401 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.61527005731008e-06, - "loss": 0.7121, - "step": 9402 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.613294764436435e-06, - "loss": 0.7301, - "step": 9403 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.611319526728301e-06, - "loss": 0.5643, - "step": 9404 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.609344344264252e-06, - "loss": 0.6948, - "step": 9405 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.607369217122874e-06, - "loss": 0.5403, - "step": 9406 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.605394145382729e-06, - "loss": 0.7078, - "step": 9407 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.603419129122395e-06, - "loss": 0.729, - "step": 9408 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.60144416842044e-06, - "loss": 0.6335, - "step": 9409 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.599469263355432e-06, - "loss": 0.6627, - "step": 9410 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.597494414005934e-06, - "loss": 0.6975, - "step": 9411 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.595519620450514e-06, - "loss": 0.5346, - "step": 9412 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.593544882767728e-06, - "loss": 0.6013, - "step": 9413 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.59157020103614e-06, - "loss": 0.5287, - "step": 9414 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.589595575334298e-06, - "loss": 0.5479, - "step": 9415 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.58762100574076e-06, - "loss": 0.6388, - "step": 9416 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.58564649233408e-06, - "loss": 0.5386, - "step": 9417 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.583672035192802e-06, - "loss": 0.5548, - "step": 9418 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.581697634395478e-06, - "loss": 0.4932, - "step": 9419 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.579723290020654e-06, - "loss": 0.5577, - "step": 9420 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.577749002146868e-06, - "loss": 0.5961, - "step": 9421 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.575774770852668e-06, - "loss": 0.6426, - "step": 9422 - }, - { - "epoch": 1.7, - "grad_norm": 0.0, - "learning_rate": 8.573800596216582e-06, - "loss": 0.6346, - "step": 9423 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.571826478317155e-06, - "loss": 0.6087, - "step": 9424 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.569852417232915e-06, - "loss": 0.6788, - "step": 9425 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.567878413042396e-06, - "loss": 0.5657, - "step": 9426 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.565904465824126e-06, - "loss": 0.6067, - "step": 9427 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.563930575656637e-06, - "loss": 0.6266, - "step": 9428 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.561956742618449e-06, - "loss": 0.5834, - "step": 9429 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.559982966788083e-06, - "loss": 0.5471, - "step": 9430 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.558009248244062e-06, - "loss": 0.7247, - "step": 9431 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.556035587064903e-06, - "loss": 0.6393, - "step": 9432 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.554061983329124e-06, - "loss": 0.7166, - "step": 9433 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.552088437115233e-06, - "loss": 0.6066, - "step": 9434 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.550114948501746e-06, - "loss": 0.5994, - "step": 9435 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.54814151756717e-06, - "loss": 0.6445, - "step": 9436 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.546168144390016e-06, - "loss": 0.6323, - "step": 9437 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.54419482904878e-06, - "loss": 0.5649, - "step": 9438 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.542221571621967e-06, - "loss": 0.6137, - "step": 9439 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.540248372188077e-06, - "loss": 0.6402, - "step": 9440 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.538275230825608e-06, - "loss": 0.6518, - "step": 9441 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.536302147613056e-06, - "loss": 0.6429, - "step": 9442 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.534329122628909e-06, - "loss": 0.5763, - "step": 9443 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.532356155951664e-06, - "loss": 0.7523, - "step": 9444 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.530383247659802e-06, - "loss": 0.6217, - "step": 9445 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.528410397831814e-06, - "loss": 0.4939, - "step": 9446 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.52643760654618e-06, - "loss": 0.6951, - "step": 9447 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.524464873881382e-06, - "loss": 0.5907, - "step": 9448 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.522492199915901e-06, - "loss": 0.6604, - "step": 9449 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.52051958472821e-06, - "loss": 0.6897, - "step": 9450 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.518547028396783e-06, - "loss": 0.5752, - "step": 9451 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.516574531000099e-06, - "loss": 0.6696, - "step": 9452 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.514602092616616e-06, - "loss": 0.6191, - "step": 9453 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.512629713324808e-06, - "loss": 0.6587, - "step": 9454 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.510657393203138e-06, - "loss": 0.4955, - "step": 9455 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.508685132330066e-06, - "loss": 0.6413, - "step": 9456 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.506712930784057e-06, - "loss": 0.6267, - "step": 9457 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.504740788643566e-06, - "loss": 0.5713, - "step": 9458 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.502768705987047e-06, - "loss": 0.5913, - "step": 9459 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.500796682892955e-06, - "loss": 0.618, - "step": 9460 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.498824719439737e-06, - "loss": 0.6933, - "step": 9461 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.496852815705844e-06, - "loss": 0.6275, - "step": 9462 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.49488097176972e-06, - "loss": 0.6539, - "step": 9463 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.49290918770981e-06, - "loss": 0.6404, - "step": 9464 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.490937463604556e-06, - "loss": 0.6334, - "step": 9465 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.488965799532394e-06, - "loss": 0.6567, - "step": 9466 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.486994195571763e-06, - "loss": 0.5501, - "step": 9467 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.485022651801092e-06, - "loss": 0.5668, - "step": 9468 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.483051168298815e-06, - "loss": 0.6608, - "step": 9469 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.481079745143361e-06, - "loss": 0.5849, - "step": 9470 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.47910838241316e-06, - "loss": 0.666, - "step": 9471 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.477137080186629e-06, - "loss": 0.679, - "step": 9472 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.475165838542195e-06, - "loss": 0.5655, - "step": 9473 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.473194657558276e-06, - "loss": 0.5462, - "step": 9474 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.471223537313294e-06, - "loss": 0.64, - "step": 9475 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.469252477885654e-06, - "loss": 0.6132, - "step": 9476 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.46728147935377e-06, - "loss": 0.5865, - "step": 9477 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.465310541796059e-06, - "loss": 0.7377, - "step": 9478 - }, - { - "epoch": 1.71, - "grad_norm": 0.0, - "learning_rate": 8.463339665290923e-06, - "loss": 0.6047, - "step": 9479 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.461368849916765e-06, - "loss": 0.578, - "step": 9480 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.459398095751991e-06, - "loss": 0.5718, - "step": 9481 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.457427402875004e-06, - "loss": 0.6605, - "step": 9482 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.455456771364194e-06, - "loss": 0.6166, - "step": 9483 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.45348620129796e-06, - "loss": 0.6538, - "step": 9484 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.451515692754693e-06, - "loss": 0.5868, - "step": 9485 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.449545245812786e-06, - "loss": 0.6351, - "step": 9486 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.447574860550625e-06, - "loss": 0.5579, - "step": 9487 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.445604537046595e-06, - "loss": 0.6293, - "step": 9488 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.443634275379078e-06, - "loss": 0.6428, - "step": 9489 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.44166407562646e-06, - "loss": 0.5884, - "step": 9490 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.439693937867111e-06, - "loss": 0.639, - "step": 9491 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.437723862179413e-06, - "loss": 0.595, - "step": 9492 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.435753848641735e-06, - "loss": 0.6246, - "step": 9493 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.433783897332444e-06, - "loss": 0.6718, - "step": 9494 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.431814008329918e-06, - "loss": 0.7234, - "step": 9495 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.429844181712515e-06, - "loss": 0.5486, - "step": 9496 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.427874417558603e-06, - "loss": 0.6353, - "step": 9497 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.42590471594654e-06, - "loss": 0.6523, - "step": 9498 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.42393507695468e-06, - "loss": 0.6918, - "step": 9499 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.421965500661383e-06, - "loss": 0.5609, - "step": 9500 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.419995987145003e-06, - "loss": 0.5236, - "step": 9501 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.418026536483888e-06, - "loss": 0.5617, - "step": 9502 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.416057148756388e-06, - "loss": 0.6648, - "step": 9503 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.41408782404085e-06, - "loss": 0.648, - "step": 9504 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.412118562415616e-06, - "loss": 0.644, - "step": 9505 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.410149363959024e-06, - "loss": 0.6476, - "step": 9506 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.408180228749413e-06, - "loss": 0.6507, - "step": 9507 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.40621115686512e-06, - "loss": 0.6403, - "step": 9508 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.404242148384478e-06, - "loss": 0.6375, - "step": 9509 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.402273203385815e-06, - "loss": 0.7164, - "step": 9510 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.400304321947463e-06, - "loss": 0.5371, - "step": 9511 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.398335504147747e-06, - "loss": 0.6476, - "step": 9512 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.396366750064987e-06, - "loss": 0.6985, - "step": 9513 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.394398059777505e-06, - "loss": 0.6528, - "step": 9514 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.392429433363615e-06, - "loss": 0.6601, - "step": 9515 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.39046087090164e-06, - "loss": 0.674, - "step": 9516 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.38849237246989e-06, - "loss": 0.6015, - "step": 9517 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.386523938146668e-06, - "loss": 0.6323, - "step": 9518 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.384555568010294e-06, - "loss": 0.5138, - "step": 9519 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.382587262139066e-06, - "loss": 0.6821, - "step": 9520 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.380619020611287e-06, - "loss": 0.7076, - "step": 9521 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.378650843505258e-06, - "loss": 0.5891, - "step": 9522 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.376682730899273e-06, - "loss": 0.6424, - "step": 9523 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.374714682871633e-06, - "loss": 0.6212, - "step": 9524 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.372746699500626e-06, - "loss": 0.6702, - "step": 9525 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.37077878086454e-06, - "loss": 0.6108, - "step": 9526 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.368810927041668e-06, - "loss": 0.5686, - "step": 9527 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.366843138110294e-06, - "loss": 0.7305, - "step": 9528 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.364875414148696e-06, - "loss": 0.6363, - "step": 9529 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.362907755235155e-06, - "loss": 0.6205, - "step": 9530 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.360940161447945e-06, - "loss": 0.6986, - "step": 9531 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.358972632865348e-06, - "loss": 0.6385, - "step": 9532 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.357005169565628e-06, - "loss": 0.5915, - "step": 9533 - }, - { - "epoch": 1.72, - "grad_norm": 0.0, - "learning_rate": 8.355037771627059e-06, - "loss": 0.5534, - "step": 9534 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.353070439127907e-06, - "loss": 0.6705, - "step": 9535 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.351103172146428e-06, - "loss": 0.5861, - "step": 9536 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.349135970760893e-06, - "loss": 0.7219, - "step": 9537 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.347168835049557e-06, - "loss": 0.7545, - "step": 9538 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.345201765090677e-06, - "loss": 0.6353, - "step": 9539 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.343234760962504e-06, - "loss": 0.6275, - "step": 9540 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.34126782274329e-06, - "loss": 0.5407, - "step": 9541 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.339300950511286e-06, - "loss": 0.6157, - "step": 9542 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.337334144344736e-06, - "loss": 0.6926, - "step": 9543 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.33536740432188e-06, - "loss": 0.5866, - "step": 9544 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.333400730520959e-06, - "loss": 0.6078, - "step": 9545 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.331434123020213e-06, - "loss": 0.5895, - "step": 9546 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.329467581897876e-06, - "loss": 0.5906, - "step": 9547 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.32750110723218e-06, - "loss": 0.5713, - "step": 9548 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.325534699101357e-06, - "loss": 0.6049, - "step": 9549 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.323568357583632e-06, - "loss": 0.5779, - "step": 9550 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.32160208275723e-06, - "loss": 0.6875, - "step": 9551 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.31963587470037e-06, - "loss": 0.5704, - "step": 9552 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.317669733491274e-06, - "loss": 0.5496, - "step": 9553 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.315703659208157e-06, - "loss": 0.4894, - "step": 9554 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.313737651929238e-06, - "loss": 0.5933, - "step": 9555 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.311771711732718e-06, - "loss": 0.603, - "step": 9556 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.309805838696815e-06, - "loss": 0.6349, - "step": 9557 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.307840032899733e-06, - "loss": 0.7172, - "step": 9558 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.30587429441967e-06, - "loss": 0.5829, - "step": 9559 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.303908623334833e-06, - "loss": 0.6006, - "step": 9560 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.301943019723413e-06, - "loss": 0.6401, - "step": 9561 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.29997748366361e-06, - "loss": 0.5333, - "step": 9562 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.298012015233615e-06, - "loss": 0.6737, - "step": 9563 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.296046614511616e-06, - "loss": 0.6479, - "step": 9564 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.294081281575807e-06, - "loss": 0.6925, - "step": 9565 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.292116016504363e-06, - "loss": 0.6695, - "step": 9566 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.29015081937547e-06, - "loss": 0.6777, - "step": 9567 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.288185690267307e-06, - "loss": 0.6197, - "step": 9568 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.286220629258047e-06, - "loss": 0.5686, - "step": 9569 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.28425563642587e-06, - "loss": 0.724, - "step": 9570 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.282290711848941e-06, - "loss": 0.7472, - "step": 9571 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.280325855605428e-06, - "loss": 0.5937, - "step": 9572 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.278361067773507e-06, - "loss": 0.6341, - "step": 9573 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.276396348431324e-06, - "loss": 0.5984, - "step": 9574 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.27443169765705e-06, - "loss": 0.5837, - "step": 9575 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.272467115528838e-06, - "loss": 0.6009, - "step": 9576 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.270502602124843e-06, - "loss": 0.6256, - "step": 9577 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.268538157523218e-06, - "loss": 0.6476, - "step": 9578 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.266573781802113e-06, - "loss": 0.6799, - "step": 9579 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.26460947503967e-06, - "loss": 0.6755, - "step": 9580 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.262645237314038e-06, - "loss": 0.6781, - "step": 9581 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.26068106870335e-06, - "loss": 0.6574, - "step": 9582 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.258716969285753e-06, - "loss": 0.5986, - "step": 9583 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.256752939139377e-06, - "loss": 0.6349, - "step": 9584 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.254788978342354e-06, - "loss": 0.5459, - "step": 9585 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.252825086972815e-06, - "loss": 0.6522, - "step": 9586 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.250861265108887e-06, - "loss": 0.6773, - "step": 9587 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.248897512828698e-06, - "loss": 0.5688, - "step": 9588 - }, - { - "epoch": 1.73, - "grad_norm": 0.0, - "learning_rate": 8.246933830210362e-06, - "loss": 0.6574, - "step": 9589 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.244970217332004e-06, - "loss": 0.6531, - "step": 9590 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.243006674271735e-06, - "loss": 0.6919, - "step": 9591 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.24104320110767e-06, - "loss": 0.6354, - "step": 9592 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.239079797917921e-06, - "loss": 0.6319, - "step": 9593 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.237116464780592e-06, - "loss": 0.5582, - "step": 9594 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.235153201773794e-06, - "loss": 0.6491, - "step": 9595 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.233190008975624e-06, - "loss": 0.6007, - "step": 9596 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.231226886464182e-06, - "loss": 0.7538, - "step": 9597 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.229263834317565e-06, - "loss": 0.7019, - "step": 9598 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.227300852613863e-06, - "loss": 0.6909, - "step": 9599 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.225337941431171e-06, - "loss": 0.5706, - "step": 9600 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.223375100847577e-06, - "loss": 0.683, - "step": 9601 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.221412330941163e-06, - "loss": 0.66, - "step": 9602 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.219449631790019e-06, - "loss": 0.6881, - "step": 9603 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.217487003472213e-06, - "loss": 0.6256, - "step": 9604 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.215524446065831e-06, - "loss": 0.5993, - "step": 9605 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.213561959648941e-06, - "loss": 0.6869, - "step": 9606 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.211599544299616e-06, - "loss": 0.6607, - "step": 9607 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.209637200095926e-06, - "loss": 0.6706, - "step": 9608 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.207674927115936e-06, - "loss": 0.5931, - "step": 9609 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.205712725437706e-06, - "loss": 0.5309, - "step": 9610 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.203750595139304e-06, - "loss": 0.6473, - "step": 9611 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.201788536298774e-06, - "loss": 0.5784, - "step": 9612 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.199826548994178e-06, - "loss": 0.666, - "step": 9613 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.197864633303566e-06, - "loss": 0.7536, - "step": 9614 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.195902789304983e-06, - "loss": 0.6172, - "step": 9615 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.193941017076482e-06, - "loss": 0.5847, - "step": 9616 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.1919793166961e-06, - "loss": 0.7375, - "step": 9617 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.19001768824188e-06, - "loss": 0.5884, - "step": 9618 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.188056131791855e-06, - "loss": 0.6094, - "step": 9619 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.18609464742406e-06, - "loss": 0.6264, - "step": 9620 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.18413323521653e-06, - "loss": 0.6425, - "step": 9621 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.182171895247289e-06, - "loss": 0.6445, - "step": 9622 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.180210627594362e-06, - "loss": 0.7398, - "step": 9623 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.178249432335775e-06, - "loss": 0.5827, - "step": 9624 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.176288309549548e-06, - "loss": 0.6205, - "step": 9625 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.174327259313698e-06, - "loss": 0.6758, - "step": 9626 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.172366281706235e-06, - "loss": 0.6293, - "step": 9627 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.17040537680517e-06, - "loss": 0.5685, - "step": 9628 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.168444544688516e-06, - "loss": 0.5776, - "step": 9629 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.166483785434274e-06, - "loss": 0.5204, - "step": 9630 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.16452309912045e-06, - "loss": 0.5848, - "step": 9631 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.162562485825039e-06, - "loss": 0.5648, - "step": 9632 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.160601945626041e-06, - "loss": 0.6279, - "step": 9633 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.158641478601453e-06, - "loss": 0.7184, - "step": 9634 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.156681084829256e-06, - "loss": 0.6081, - "step": 9635 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.154720764387446e-06, - "loss": 0.6139, - "step": 9636 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.152760517354002e-06, - "loss": 0.5391, - "step": 9637 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.15080034380691e-06, - "loss": 0.6157, - "step": 9638 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.148840243824151e-06, - "loss": 0.6104, - "step": 9639 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.146880217483695e-06, - "loss": 0.6085, - "step": 9640 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.144920264863523e-06, - "loss": 0.6382, - "step": 9641 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.142960386041597e-06, - "loss": 0.6554, - "step": 9642 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.14100058109589e-06, - "loss": 0.6337, - "step": 9643 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.139040850104362e-06, - "loss": 0.6559, - "step": 9644 - }, - { - "epoch": 1.74, - "grad_norm": 0.0, - "learning_rate": 8.137081193144978e-06, - "loss": 0.5142, - "step": 9645 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.135121610295695e-06, - "loss": 0.604, - "step": 9646 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.133162101634472e-06, - "loss": 0.6438, - "step": 9647 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.131202667239254e-06, - "loss": 0.5964, - "step": 9648 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.129243307188002e-06, - "loss": 0.6714, - "step": 9649 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.127284021558651e-06, - "loss": 0.5699, - "step": 9650 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.125324810429151e-06, - "loss": 0.6335, - "step": 9651 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.123365673877441e-06, - "loss": 0.5738, - "step": 9652 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.121406611981456e-06, - "loss": 0.6733, - "step": 9653 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.11944762481914e-06, - "loss": 0.6936, - "step": 9654 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.117488712468416e-06, - "loss": 0.5886, - "step": 9655 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.11552987500722e-06, - "loss": 0.6505, - "step": 9656 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.11357111251347e-06, - "loss": 0.5246, - "step": 9657 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.111612425065094e-06, - "loss": 0.6939, - "step": 9658 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.10965381274001e-06, - "loss": 0.6912, - "step": 9659 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.107695275616135e-06, - "loss": 0.545, - "step": 9660 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.105736813771383e-06, - "loss": 0.616, - "step": 9661 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.103778427283668e-06, - "loss": 0.6537, - "step": 9662 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.101820116230893e-06, - "loss": 0.6108, - "step": 9663 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.09986188069097e-06, - "loss": 0.596, - "step": 9664 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.097903720741794e-06, - "loss": 0.5413, - "step": 9665 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.095945636461264e-06, - "loss": 0.5923, - "step": 9666 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.093987627927283e-06, - "loss": 0.6212, - "step": 9667 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.092029695217735e-06, - "loss": 0.5652, - "step": 9668 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.090071838410514e-06, - "loss": 0.645, - "step": 9669 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.08811405758351e-06, - "loss": 0.6414, - "step": 9670 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.086156352814605e-06, - "loss": 0.6428, - "step": 9671 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.084198724181678e-06, - "loss": 0.5837, - "step": 9672 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.082241171762607e-06, - "loss": 0.6812, - "step": 9673 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.080283695635267e-06, - "loss": 0.5449, - "step": 9674 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.078326295877529e-06, - "loss": 0.5365, - "step": 9675 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.076368972567267e-06, - "loss": 0.6293, - "step": 9676 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.074411725782341e-06, - "loss": 0.5468, - "step": 9677 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.072454555600611e-06, - "loss": 0.6604, - "step": 9678 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.070497462099948e-06, - "loss": 0.6599, - "step": 9679 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.068540445358198e-06, - "loss": 0.6824, - "step": 9680 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.066583505453217e-06, - "loss": 0.6136, - "step": 9681 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.064626642462857e-06, - "loss": 0.6126, - "step": 9682 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.062669856464962e-06, - "loss": 0.7593, - "step": 9683 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.060713147537382e-06, - "loss": 0.5944, - "step": 9684 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.058756515757956e-06, - "loss": 0.6031, - "step": 9685 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.056799961204518e-06, - "loss": 0.6996, - "step": 9686 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.054843483954913e-06, - "loss": 0.5555, - "step": 9687 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.05288708408696e-06, - "loss": 0.6597, - "step": 9688 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.050930761678496e-06, - "loss": 0.6753, - "step": 9689 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.048974516807347e-06, - "loss": 0.6937, - "step": 9690 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.04701834955133e-06, - "loss": 0.6088, - "step": 9691 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.04506225998827e-06, - "loss": 0.684, - "step": 9692 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.043106248195984e-06, - "loss": 0.6468, - "step": 9693 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.041150314252286e-06, - "loss": 0.6005, - "step": 9694 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.039194458234983e-06, - "loss": 0.6405, - "step": 9695 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.037238680221879e-06, - "loss": 0.5999, - "step": 9696 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.035282980290786e-06, - "loss": 0.5481, - "step": 9697 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.033327358519502e-06, - "loss": 0.6312, - "step": 9698 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.031371814985823e-06, - "loss": 0.6527, - "step": 9699 - }, - { - "epoch": 1.75, - "grad_norm": 0.0, - "learning_rate": 8.029416349767548e-06, - "loss": 0.5187, - "step": 9700 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.027460962942463e-06, - "loss": 0.7546, - "step": 9701 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.025505654588366e-06, - "loss": 0.551, - "step": 9702 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.023550424783034e-06, - "loss": 0.6436, - "step": 9703 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.021595273604248e-06, - "loss": 0.516, - "step": 9704 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.019640201129794e-06, - "loss": 0.5074, - "step": 9705 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.017685207437446e-06, - "loss": 0.6908, - "step": 9706 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.015730292604972e-06, - "loss": 0.5521, - "step": 9707 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.013775456710151e-06, - "loss": 0.5556, - "step": 9708 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.011820699830747e-06, - "loss": 0.6337, - "step": 9709 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.009866022044518e-06, - "loss": 0.6317, - "step": 9710 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.00791142342923e-06, - "loss": 0.6581, - "step": 9711 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.005956904062634e-06, - "loss": 0.6747, - "step": 9712 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.00400246402249e-06, - "loss": 0.5853, - "step": 9713 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.00204810338655e-06, - "loss": 0.6521, - "step": 9714 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 8.000093822232555e-06, - "loss": 0.6278, - "step": 9715 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.998139620638256e-06, - "loss": 0.6368, - "step": 9716 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.996185498681395e-06, - "loss": 0.6306, - "step": 9717 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.994231456439704e-06, - "loss": 0.7413, - "step": 9718 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.992277493990924e-06, - "loss": 0.5182, - "step": 9719 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.990323611412781e-06, - "loss": 0.5998, - "step": 9720 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.988369808783011e-06, - "loss": 0.5926, - "step": 9721 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.986416086179335e-06, - "loss": 0.5905, - "step": 9722 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.984462443679477e-06, - "loss": 0.4808, - "step": 9723 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.982508881361153e-06, - "loss": 0.5471, - "step": 9724 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.980555399302087e-06, - "loss": 0.6014, - "step": 9725 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.978601997579985e-06, - "loss": 0.5728, - "step": 9726 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.976648676272556e-06, - "loss": 0.6151, - "step": 9727 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.974695435457512e-06, - "loss": 0.5938, - "step": 9728 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.972742275212549e-06, - "loss": 0.6038, - "step": 9729 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.970789195615373e-06, - "loss": 0.574, - "step": 9730 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.968836196743679e-06, - "loss": 0.7643, - "step": 9731 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.966883278675164e-06, - "loss": 0.6422, - "step": 9732 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.964930441487514e-06, - "loss": 0.6744, - "step": 9733 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.962977685258413e-06, - "loss": 0.6154, - "step": 9734 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.96102501006555e-06, - "loss": 0.5835, - "step": 9735 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.959072415986608e-06, - "loss": 0.6907, - "step": 9736 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.95711990309926e-06, - "loss": 0.6055, - "step": 9737 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.955167471481182e-06, - "loss": 0.5881, - "step": 9738 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.953215121210045e-06, - "loss": 0.5553, - "step": 9739 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.95126285236352e-06, - "loss": 0.7306, - "step": 9740 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.949310665019266e-06, - "loss": 0.6366, - "step": 9741 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.947358559254945e-06, - "loss": 0.5953, - "step": 9742 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.945406535148218e-06, - "loss": 0.6599, - "step": 9743 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.94345459277674e-06, - "loss": 0.7025, - "step": 9744 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.941502732218157e-06, - "loss": 0.6368, - "step": 9745 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.939550953550126e-06, - "loss": 0.6835, - "step": 9746 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.937599256850289e-06, - "loss": 0.6165, - "step": 9747 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.935647642196285e-06, - "loss": 0.5281, - "step": 9748 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.933696109665754e-06, - "loss": 0.6001, - "step": 9749 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.931744659336329e-06, - "loss": 0.591, - "step": 9750 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.929793291285648e-06, - "loss": 0.6151, - "step": 9751 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.927842005591334e-06, - "loss": 0.6722, - "step": 9752 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.925890802331015e-06, - "loss": 0.5558, - "step": 9753 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.923939681582314e-06, - "loss": 0.7596, - "step": 9754 - }, - { - "epoch": 1.76, - "grad_norm": 0.0, - "learning_rate": 7.92198864342285e-06, - "loss": 0.5759, - "step": 9755 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.920037687930239e-06, - "loss": 0.691, - "step": 9756 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.918086815182089e-06, - "loss": 0.6007, - "step": 9757 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.91613602525601e-06, - "loss": 0.5994, - "step": 9758 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.914185318229614e-06, - "loss": 0.6511, - "step": 9759 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.912234694180497e-06, - "loss": 0.6389, - "step": 9760 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.91028415318626e-06, - "loss": 0.5932, - "step": 9761 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.908333695324504e-06, - "loss": 0.6414, - "step": 9762 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.90638332067281e-06, - "loss": 0.5615, - "step": 9763 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.904433029308779e-06, - "loss": 0.6618, - "step": 9764 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.90248282130999e-06, - "loss": 0.6936, - "step": 9765 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.900532696754026e-06, - "loss": 0.572, - "step": 9766 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.898582655718469e-06, - "loss": 0.5527, - "step": 9767 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.896632698280894e-06, - "loss": 0.5158, - "step": 9768 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.894682824518876e-06, - "loss": 0.6249, - "step": 9769 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.892733034509982e-06, - "loss": 0.5762, - "step": 9770 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.890783328331774e-06, - "loss": 0.627, - "step": 9771 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.88883370606182e-06, - "loss": 0.6995, - "step": 9772 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.88688416777768e-06, - "loss": 0.6992, - "step": 9773 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.884934713556904e-06, - "loss": 0.6223, - "step": 9774 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.882985343477049e-06, - "loss": 0.6207, - "step": 9775 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.881036057615665e-06, - "loss": 0.6453, - "step": 9776 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.879086856050298e-06, - "loss": 0.6317, - "step": 9777 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.877137738858489e-06, - "loss": 0.6267, - "step": 9778 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.875188706117777e-06, - "loss": 0.6425, - "step": 9779 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.873239757905695e-06, - "loss": 0.5981, - "step": 9780 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.87129089429978e-06, - "loss": 0.5921, - "step": 9781 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.869342115377562e-06, - "loss": 0.6619, - "step": 9782 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.867393421216562e-06, - "loss": 0.6063, - "step": 9783 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.865444811894307e-06, - "loss": 0.6009, - "step": 9784 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.863496287488316e-06, - "loss": 0.6569, - "step": 9785 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.861547848076102e-06, - "loss": 0.6887, - "step": 9786 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.859599493735177e-06, - "loss": 0.6628, - "step": 9787 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.857651224543047e-06, - "loss": 0.5708, - "step": 9788 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.855703040577227e-06, - "loss": 0.6334, - "step": 9789 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.85375494191521e-06, - "loss": 0.6948, - "step": 9790 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.851806928634498e-06, - "loss": 0.5931, - "step": 9791 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.849859000812588e-06, - "loss": 0.6878, - "step": 9792 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.847911158526973e-06, - "loss": 0.6265, - "step": 9793 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.845963401855135e-06, - "loss": 0.6109, - "step": 9794 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.844015730874565e-06, - "loss": 0.7305, - "step": 9795 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.84206814566274e-06, - "loss": 0.5526, - "step": 9796 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.840120646297143e-06, - "loss": 0.7292, - "step": 9797 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.838173232855246e-06, - "loss": 0.6158, - "step": 9798 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.836225905414518e-06, - "loss": 0.627, - "step": 9799 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.83427866405244e-06, - "loss": 0.5784, - "step": 9800 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.832331508846459e-06, - "loss": 0.497, - "step": 9801 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.830384439874045e-06, - "loss": 0.4855, - "step": 9802 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.828437457212655e-06, - "loss": 0.6606, - "step": 9803 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.82649056093974e-06, - "loss": 0.61, - "step": 9804 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.824543751132758e-06, - "loss": 0.6448, - "step": 9805 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.822597027869151e-06, - "loss": 0.6583, - "step": 9806 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.820650391226364e-06, - "loss": 0.5959, - "step": 9807 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.818703841281844e-06, - "loss": 0.6646, - "step": 9808 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.816757378113014e-06, - "loss": 0.6826, - "step": 9809 - }, - { - "epoch": 1.77, - "grad_norm": 0.0, - "learning_rate": 7.81481100179732e-06, - "loss": 0.6137, - "step": 9810 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.812864712412184e-06, - "loss": 0.5818, - "step": 9811 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.810918510035039e-06, - "loss": 0.7383, - "step": 9812 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.808972394743304e-06, - "loss": 0.5284, - "step": 9813 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.807026366614402e-06, - "loss": 0.6146, - "step": 9814 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.80508042572575e-06, - "loss": 0.6556, - "step": 9815 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.803134572154758e-06, - "loss": 0.6305, - "step": 9816 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.801188805978832e-06, - "loss": 0.6069, - "step": 9817 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.799243127275385e-06, - "loss": 0.5319, - "step": 9818 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.797297536121817e-06, - "loss": 0.5414, - "step": 9819 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.795352032595527e-06, - "loss": 0.5464, - "step": 9820 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.793406616773907e-06, - "loss": 0.6643, - "step": 9821 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.791461288734353e-06, - "loss": 0.6477, - "step": 9822 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.789516048554255e-06, - "loss": 0.6049, - "step": 9823 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.787570896310994e-06, - "loss": 0.581, - "step": 9824 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.785625832081952e-06, - "loss": 0.7091, - "step": 9825 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.783680855944506e-06, - "loss": 0.6428, - "step": 9826 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.781735967976034e-06, - "loss": 0.5507, - "step": 9827 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.779791168253908e-06, - "loss": 0.6576, - "step": 9828 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.777846456855487e-06, - "loss": 0.7107, - "step": 9829 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.775901833858143e-06, - "loss": 0.6314, - "step": 9830 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.773957299339239e-06, - "loss": 0.6865, - "step": 9831 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.772012853376124e-06, - "loss": 0.5318, - "step": 9832 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.770068496046154e-06, - "loss": 0.5245, - "step": 9833 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.768124227426676e-06, - "loss": 0.6659, - "step": 9834 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.766180047595043e-06, - "loss": 0.6474, - "step": 9835 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.764235956628596e-06, - "loss": 0.5892, - "step": 9836 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.762291954604668e-06, - "loss": 0.7063, - "step": 9837 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.760348041600607e-06, - "loss": 0.5822, - "step": 9838 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.758404217693731e-06, - "loss": 0.5233, - "step": 9839 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.756460482961376e-06, - "loss": 0.6843, - "step": 9840 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.754516837480869e-06, - "loss": 0.5825, - "step": 9841 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.752573281329526e-06, - "loss": 0.5977, - "step": 9842 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.75062981458467e-06, - "loss": 0.5657, - "step": 9843 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.748686437323613e-06, - "loss": 0.7223, - "step": 9844 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.746743149623663e-06, - "loss": 0.6836, - "step": 9845 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.744799951562139e-06, - "loss": 0.5778, - "step": 9846 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.742856843216328e-06, - "loss": 0.4852, - "step": 9847 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.740913824663544e-06, - "loss": 0.692, - "step": 9848 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.738970895981073e-06, - "loss": 0.5054, - "step": 9849 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.737028057246214e-06, - "loss": 0.639, - "step": 9850 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.73508530853626e-06, - "loss": 0.6448, - "step": 9851 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.733142649928488e-06, - "loss": 0.6695, - "step": 9852 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.731200081500189e-06, - "loss": 0.6175, - "step": 9853 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.729257603328636e-06, - "loss": 0.6074, - "step": 9854 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.727315215491102e-06, - "loss": 0.6059, - "step": 9855 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.725372918064863e-06, - "loss": 0.6685, - "step": 9856 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.723430711127189e-06, - "loss": 0.5403, - "step": 9857 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.72148859475534e-06, - "loss": 0.6743, - "step": 9858 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.719546569026575e-06, - "loss": 0.6437, - "step": 9859 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.717604634018157e-06, - "loss": 0.7005, - "step": 9860 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.71566278980734e-06, - "loss": 0.5859, - "step": 9861 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.713721036471366e-06, - "loss": 0.6071, - "step": 9862 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.711779374087487e-06, - "loss": 0.6357, - "step": 9863 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.709837802732943e-06, - "loss": 0.6145, - "step": 9864 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.707896322484974e-06, - "loss": 0.5076, - "step": 9865 - }, - { - "epoch": 1.78, - "grad_norm": 0.0, - "learning_rate": 7.705954933420819e-06, - "loss": 0.6618, - "step": 9866 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.704013635617701e-06, - "loss": 0.7481, - "step": 9867 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.702072429152863e-06, - "loss": 0.5698, - "step": 9868 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.700131314103514e-06, - "loss": 0.5491, - "step": 9869 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.698190290546883e-06, - "loss": 0.5501, - "step": 9870 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.696249358560185e-06, - "loss": 0.6142, - "step": 9871 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.694308518220633e-06, - "loss": 0.6425, - "step": 9872 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.692367769605438e-06, - "loss": 0.5873, - "step": 9873 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.690427112791807e-06, - "loss": 0.5782, - "step": 9874 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.688486547856942e-06, - "loss": 0.6026, - "step": 9875 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.686546074878045e-06, - "loss": 0.5404, - "step": 9876 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.684605693932306e-06, - "loss": 0.6317, - "step": 9877 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.682665405096917e-06, - "loss": 0.5777, - "step": 9878 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.68072520844907e-06, - "loss": 0.6865, - "step": 9879 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.678785104065945e-06, - "loss": 0.6624, - "step": 9880 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.676845092024728e-06, - "loss": 0.7674, - "step": 9881 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.674905172402591e-06, - "loss": 0.6428, - "step": 9882 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.67296534527671e-06, - "loss": 0.5662, - "step": 9883 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.671025610724258e-06, - "loss": 0.6866, - "step": 9884 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.669085968822392e-06, - "loss": 0.547, - "step": 9885 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.66714641964828e-06, - "loss": 0.6802, - "step": 9886 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.66520696327908e-06, - "loss": 0.659, - "step": 9887 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.663267599791946e-06, - "loss": 0.6048, - "step": 9888 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.66132832926403e-06, - "loss": 0.7036, - "step": 9889 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.659389151772481e-06, - "loss": 0.6768, - "step": 9890 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.657450067394443e-06, - "loss": 0.5898, - "step": 9891 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.655511076207053e-06, - "loss": 0.6032, - "step": 9892 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.653572178287444e-06, - "loss": 0.6249, - "step": 9893 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.651633373712759e-06, - "loss": 0.5956, - "step": 9894 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.64969466256012e-06, - "loss": 0.6393, - "step": 9895 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.64775604490665e-06, - "loss": 0.6486, - "step": 9896 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.645817520829479e-06, - "loss": 0.7354, - "step": 9897 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.643879090405718e-06, - "loss": 0.6451, - "step": 9898 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.641940753712486e-06, - "loss": 0.7085, - "step": 9899 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.640002510826887e-06, - "loss": 0.642, - "step": 9900 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.63806436182603e-06, - "loss": 0.601, - "step": 9901 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.63612630678702e-06, - "loss": 0.5441, - "step": 9902 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.634188345786956e-06, - "loss": 0.7312, - "step": 9903 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.632250478902933e-06, - "loss": 0.6846, - "step": 9904 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.63031270621204e-06, - "loss": 0.653, - "step": 9905 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.628375027791369e-06, - "loss": 0.6539, - "step": 9906 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.626437443718002e-06, - "loss": 0.5999, - "step": 9907 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.62449995406902e-06, - "loss": 0.5561, - "step": 9908 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.622562558921498e-06, - "loss": 0.6439, - "step": 9909 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.620625258352508e-06, - "loss": 0.5874, - "step": 9910 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.618688052439125e-06, - "loss": 0.6693, - "step": 9911 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.61675094125841e-06, - "loss": 0.6683, - "step": 9912 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.614813924887423e-06, - "loss": 0.588, - "step": 9913 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.612877003403231e-06, - "loss": 0.6416, - "step": 9914 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.610940176882874e-06, - "loss": 0.6175, - "step": 9915 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.609003445403411e-06, - "loss": 0.5841, - "step": 9916 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.607066809041887e-06, - "loss": 0.5653, - "step": 9917 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.605130267875345e-06, - "loss": 0.6265, - "step": 9918 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.603193821980823e-06, - "loss": 0.7039, - "step": 9919 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.601257471435358e-06, - "loss": 0.5465, - "step": 9920 - }, - { - "epoch": 1.79, - "grad_norm": 0.0, - "learning_rate": 7.599321216315982e-06, - "loss": 0.6854, - "step": 9921 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.597385056699719e-06, - "loss": 0.6405, - "step": 9922 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.595448992663591e-06, - "loss": 0.7048, - "step": 9923 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.593513024284624e-06, - "loss": 0.5993, - "step": 9924 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.591577151639832e-06, - "loss": 0.5613, - "step": 9925 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.589641374806223e-06, - "loss": 0.6836, - "step": 9926 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.587705693860813e-06, - "loss": 0.5452, - "step": 9927 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.585770108880602e-06, - "loss": 0.6849, - "step": 9928 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.583834619942595e-06, - "loss": 0.6244, - "step": 9929 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.581899227123784e-06, - "loss": 0.6244, - "step": 9930 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.579963930501161e-06, - "loss": 0.5922, - "step": 9931 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5780287301517205e-06, - "loss": 0.5515, - "step": 9932 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.576093626152448e-06, - "loss": 0.6998, - "step": 9933 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.57415861858032e-06, - "loss": 0.6271, - "step": 9934 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.572223707512319e-06, - "loss": 0.5794, - "step": 9935 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.570288893025419e-06, - "loss": 0.5785, - "step": 9936 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.568354175196592e-06, - "loss": 0.5133, - "step": 9937 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.566419554102798e-06, - "loss": 0.5984, - "step": 9938 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.564485029821003e-06, - "loss": 0.7117, - "step": 9939 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.562550602428166e-06, - "loss": 0.6045, - "step": 9940 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.560616272001242e-06, - "loss": 0.5442, - "step": 9941 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.55868203861718e-06, - "loss": 0.6665, - "step": 9942 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.55674790235293e-06, - "loss": 0.5188, - "step": 9943 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5548138632854375e-06, - "loss": 0.6294, - "step": 9944 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5528799214916345e-06, - "loss": 0.7062, - "step": 9945 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5509460770484615e-06, - "loss": 0.6525, - "step": 9946 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.549012330032847e-06, - "loss": 0.6714, - "step": 9947 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.547078680521722e-06, - "loss": 0.628, - "step": 9948 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.545145128592009e-06, - "loss": 0.5864, - "step": 9949 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.54321167432063e-06, - "loss": 0.5928, - "step": 9950 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.541278317784495e-06, - "loss": 0.577, - "step": 9951 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5393450590605265e-06, - "loss": 0.6367, - "step": 9952 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.537411898225624e-06, - "loss": 0.6443, - "step": 9953 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.535478835356695e-06, - "loss": 0.7125, - "step": 9954 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.53354587053064e-06, - "loss": 0.7029, - "step": 9955 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.531613003824353e-06, - "loss": 0.5812, - "step": 9956 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.529680235314732e-06, - "loss": 0.5836, - "step": 9957 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.527747565078663e-06, - "loss": 0.5945, - "step": 9958 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5258149931930325e-06, - "loss": 0.5695, - "step": 9959 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.523882519734718e-06, - "loss": 0.555, - "step": 9960 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.521950144780597e-06, - "loss": 0.5634, - "step": 9961 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5200178684075465e-06, - "loss": 0.566, - "step": 9962 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.518085690692434e-06, - "loss": 0.622, - "step": 9963 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5161536117121215e-06, - "loss": 0.6218, - "step": 9964 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.514221631543477e-06, - "loss": 0.6902, - "step": 9965 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.512289750263353e-06, - "loss": 0.6242, - "step": 9966 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.510357967948607e-06, - "loss": 0.6095, - "step": 9967 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.508426284676084e-06, - "loss": 0.6912, - "step": 9968 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5064947005226285e-06, - "loss": 0.7408, - "step": 9969 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.504563215565088e-06, - "loss": 0.6647, - "step": 9970 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.5026318298802994e-06, - "loss": 0.6153, - "step": 9971 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.500700543545091e-06, - "loss": 0.6122, - "step": 9972 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.4987693566363e-06, - "loss": 0.6678, - "step": 9973 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.4968382692307485e-06, - "loss": 0.6159, - "step": 9974 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.494907281405259e-06, - "loss": 0.6169, - "step": 9975 - }, - { - "epoch": 1.8, - "grad_norm": 0.0, - "learning_rate": 7.492976393236649e-06, - "loss": 0.6049, - "step": 9976 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.49104560480173e-06, - "loss": 0.6281, - "step": 9977 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4891149161773165e-06, - "loss": 0.5281, - "step": 9978 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4871843274402145e-06, - "loss": 0.6176, - "step": 9979 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.485253838667221e-06, - "loss": 0.7829, - "step": 9980 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.483323449935141e-06, - "loss": 0.6417, - "step": 9981 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.481393161320768e-06, - "loss": 0.6395, - "step": 9982 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.479462972900886e-06, - "loss": 0.5877, - "step": 9983 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.477532884752287e-06, - "loss": 0.6465, - "step": 9984 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.475602896951748e-06, - "loss": 0.6417, - "step": 9985 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.473673009576051e-06, - "loss": 0.5803, - "step": 9986 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.47174322270197e-06, - "loss": 0.5761, - "step": 9987 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.469813536406274e-06, - "loss": 0.6414, - "step": 9988 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.467883950765732e-06, - "loss": 0.6958, - "step": 9989 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4659544658571055e-06, - "loss": 0.5506, - "step": 9990 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.464025081757149e-06, - "loss": 0.643, - "step": 9991 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.46209579854262e-06, - "loss": 0.49, - "step": 9992 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.460166616290266e-06, - "loss": 0.6777, - "step": 9993 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4582375350768355e-06, - "loss": 0.661, - "step": 9994 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4563085549790705e-06, - "loss": 0.7214, - "step": 9995 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.454379676073709e-06, - "loss": 0.586, - "step": 9996 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.452450898437489e-06, - "loss": 0.5903, - "step": 9997 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.450522222147129e-06, - "loss": 0.5766, - "step": 9998 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.448593647279367e-06, - "loss": 0.5383, - "step": 9999 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.446665173910919e-06, - "loss": 0.6156, - "step": 10000 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.444736802118505e-06, - "loss": 0.663, - "step": 10001 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.442808531978837e-06, - "loss": 0.5641, - "step": 10002 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.440880363568628e-06, - "loss": 0.5945, - "step": 10003 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.438952296964581e-06, - "loss": 0.6289, - "step": 10004 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.437024332243403e-06, - "loss": 0.584, - "step": 10005 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.435096469481784e-06, - "loss": 0.6291, - "step": 10006 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.43316870875642e-06, - "loss": 0.7108, - "step": 10007 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.431241050144005e-06, - "loss": 0.5507, - "step": 10008 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.42931349372122e-06, - "loss": 0.5564, - "step": 10009 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.427386039564746e-06, - "loss": 0.5955, - "step": 10010 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.425458687751266e-06, - "loss": 0.6633, - "step": 10011 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.42353143835745e-06, - "loss": 0.6297, - "step": 10012 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.421604291459966e-06, - "loss": 0.5137, - "step": 10013 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.419677247135481e-06, - "loss": 0.6572, - "step": 10014 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.417750305460651e-06, - "loss": 0.54, - "step": 10015 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.415823466512142e-06, - "loss": 0.6235, - "step": 10016 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.413896730366602e-06, - "loss": 0.6643, - "step": 10017 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.411970097100678e-06, - "loss": 0.6087, - "step": 10018 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.410043566791019e-06, - "loss": 0.6439, - "step": 10019 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.408117139514266e-06, - "loss": 0.5386, - "step": 10020 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.406190815347052e-06, - "loss": 0.5998, - "step": 10021 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4042645943660085e-06, - "loss": 0.5425, - "step": 10022 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.4023384766477656e-06, - "loss": 0.6334, - "step": 10023 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.400412462268951e-06, - "loss": 0.6647, - "step": 10024 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.398486551306181e-06, - "loss": 0.5143, - "step": 10025 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.39656074383607e-06, - "loss": 0.6001, - "step": 10026 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.3946350399352405e-06, - "loss": 0.6507, - "step": 10027 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.392709439680285e-06, - "loss": 0.674, - "step": 10028 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.390783943147817e-06, - "loss": 0.5871, - "step": 10029 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.388858550414433e-06, - "loss": 0.6881, - "step": 10030 - }, - { - "epoch": 1.81, - "grad_norm": 0.0, - "learning_rate": 7.386933261556727e-06, - "loss": 0.6375, - "step": 10031 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.385008076651294e-06, - "loss": 0.5997, - "step": 10032 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.383082995774721e-06, - "loss": 0.5516, - "step": 10033 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3811580190035855e-06, - "loss": 0.737, - "step": 10034 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.379233146414477e-06, - "loss": 0.6367, - "step": 10035 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.377308378083958e-06, - "loss": 0.7387, - "step": 10036 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.375383714088608e-06, - "loss": 0.6181, - "step": 10037 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.373459154504987e-06, - "loss": 0.7058, - "step": 10038 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.371534699409662e-06, - "loss": 0.7154, - "step": 10039 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.369610348879188e-06, - "loss": 0.6071, - "step": 10040 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.367686102990122e-06, - "loss": 0.6441, - "step": 10041 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.365761961819012e-06, - "loss": 0.5827, - "step": 10042 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.363837925442405e-06, - "loss": 0.5277, - "step": 10043 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.361913993936839e-06, - "loss": 0.537, - "step": 10044 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.359990167378854e-06, - "loss": 0.5591, - "step": 10045 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.358066445844985e-06, - "loss": 0.6026, - "step": 10046 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.356142829411756e-06, - "loss": 0.6643, - "step": 10047 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.354219318155695e-06, - "loss": 0.5992, - "step": 10048 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.352295912153323e-06, - "loss": 0.677, - "step": 10049 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.350372611481156e-06, - "loss": 0.6484, - "step": 10050 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.348449416215705e-06, - "loss": 0.6764, - "step": 10051 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.346526326433478e-06, - "loss": 0.616, - "step": 10052 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.344603342210978e-06, - "loss": 0.6056, - "step": 10053 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.342680463624707e-06, - "loss": 0.6484, - "step": 10054 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.34075769075116e-06, - "loss": 0.5769, - "step": 10055 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.338835023666826e-06, - "loss": 0.7274, - "step": 10056 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3369124624481945e-06, - "loss": 0.6088, - "step": 10057 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.33499000717175e-06, - "loss": 0.5532, - "step": 10058 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.333067657913967e-06, - "loss": 0.6073, - "step": 10059 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3311454147513205e-06, - "loss": 0.5797, - "step": 10060 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.32922327776028e-06, - "loss": 0.6043, - "step": 10061 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.327301247017314e-06, - "loss": 0.6595, - "step": 10062 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.325379322598885e-06, - "loss": 0.6149, - "step": 10063 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3234575045814435e-06, - "loss": 0.6456, - "step": 10064 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.321535793041455e-06, - "loss": 0.6546, - "step": 10065 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.319614188055355e-06, - "loss": 0.6502, - "step": 10066 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.317692689699596e-06, - "loss": 0.5859, - "step": 10067 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.315771298050618e-06, - "loss": 0.6328, - "step": 10068 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.313850013184853e-06, - "loss": 0.6231, - "step": 10069 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.311928835178739e-06, - "loss": 0.5184, - "step": 10070 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3100077641087e-06, - "loss": 0.6901, - "step": 10071 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.30808680005116e-06, - "loss": 0.6299, - "step": 10072 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.306165943082543e-06, - "loss": 0.5735, - "step": 10073 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3042451932792534e-06, - "loss": 0.5721, - "step": 10074 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.3023245507177095e-06, - "loss": 0.6057, - "step": 10075 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.300404015474318e-06, - "loss": 0.6765, - "step": 10076 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.2984835876254775e-06, - "loss": 0.6289, - "step": 10077 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.29656326724759e-06, - "loss": 0.6189, - "step": 10078 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.294643054417045e-06, - "loss": 0.6135, - "step": 10079 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.2927229492102384e-06, - "loss": 0.5892, - "step": 10080 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.290802951703546e-06, - "loss": 0.6238, - "step": 10081 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.288883061973353e-06, - "loss": 0.6501, - "step": 10082 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.286963280096038e-06, - "loss": 0.6367, - "step": 10083 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.2850436061479704e-06, - "loss": 0.6447, - "step": 10084 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.283124040205518e-06, - "loss": 0.608, - "step": 10085 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.281204582345047e-06, - "loss": 0.5835, - "step": 10086 - }, - { - "epoch": 1.82, - "grad_norm": 0.0, - "learning_rate": 7.279285232642916e-06, - "loss": 0.6345, - "step": 10087 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.277365991175482e-06, - "loss": 0.655, - "step": 10088 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.27544685801909e-06, - "loss": 0.6182, - "step": 10089 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.273527833250087e-06, - "loss": 0.6176, - "step": 10090 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.27160891694482e-06, - "loss": 0.6118, - "step": 10091 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.269690109179625e-06, - "loss": 0.6882, - "step": 10092 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.267771410030832e-06, - "loss": 0.5902, - "step": 10093 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.265852819574775e-06, - "loss": 0.7526, - "step": 10094 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.263934337887776e-06, - "loss": 0.6904, - "step": 10095 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2620159650461585e-06, - "loss": 0.6071, - "step": 10096 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.260097701126234e-06, - "loss": 0.5755, - "step": 10097 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.258179546204318e-06, - "loss": 0.6516, - "step": 10098 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2562615003567135e-06, - "loss": 0.5836, - "step": 10099 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.254343563659729e-06, - "loss": 0.6155, - "step": 10100 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.252425736189661e-06, - "loss": 0.6356, - "step": 10101 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2505080180228025e-06, - "loss": 0.6232, - "step": 10102 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.248590409235452e-06, - "loss": 0.6304, - "step": 10103 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.24667290990388e-06, - "loss": 0.6374, - "step": 10104 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.244755520104381e-06, - "loss": 0.6237, - "step": 10105 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.242838239913226e-06, - "loss": 0.6796, - "step": 10106 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.240921069406688e-06, - "loss": 0.6164, - "step": 10107 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.239004008661037e-06, - "loss": 0.6288, - "step": 10108 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.237087057752537e-06, - "loss": 0.5628, - "step": 10109 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.235170216757446e-06, - "loss": 0.6166, - "step": 10110 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.233253485752025e-06, - "loss": 0.631, - "step": 10111 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.231336864812513e-06, - "loss": 0.6105, - "step": 10112 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.229420354015168e-06, - "loss": 0.5924, - "step": 10113 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2275039534362254e-06, - "loss": 0.5903, - "step": 10114 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.225587663151925e-06, - "loss": 0.6282, - "step": 10115 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.223671483238502e-06, - "loss": 0.5943, - "step": 10116 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.221755413772182e-06, - "loss": 0.6428, - "step": 10117 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.219839454829194e-06, - "loss": 0.5812, - "step": 10118 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2179236064857525e-06, - "loss": 0.692, - "step": 10119 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.216007868818076e-06, - "loss": 0.5651, - "step": 10120 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.214092241902375e-06, - "loss": 0.5709, - "step": 10121 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.212176725814859e-06, - "loss": 0.6264, - "step": 10122 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.210261320631725e-06, - "loss": 0.7219, - "step": 10123 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.208346026429178e-06, - "loss": 0.6523, - "step": 10124 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.206430843283407e-06, - "loss": 0.657, - "step": 10125 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.2045157712706075e-06, - "loss": 0.6852, - "step": 10126 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.202600810466955e-06, - "loss": 0.6043, - "step": 10127 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.200685960948633e-06, - "loss": 0.7257, - "step": 10128 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.198771222791821e-06, - "loss": 0.5217, - "step": 10129 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.19685659607269e-06, - "loss": 0.7045, - "step": 10130 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.194942080867402e-06, - "loss": 0.6169, - "step": 10131 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.193027677252126e-06, - "loss": 0.5953, - "step": 10132 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.191113385303019e-06, - "loss": 0.6941, - "step": 10133 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.189199205096232e-06, - "loss": 0.6317, - "step": 10134 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.1872851367079155e-06, - "loss": 0.6485, - "step": 10135 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.185371180214214e-06, - "loss": 0.5984, - "step": 10136 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.1834573356912685e-06, - "loss": 0.5535, - "step": 10137 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.181543603215217e-06, - "loss": 0.5755, - "step": 10138 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.1796299828621884e-06, - "loss": 0.6723, - "step": 10139 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.177716474708309e-06, - "loss": 0.5906, - "step": 10140 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.175803078829706e-06, - "loss": 0.5269, - "step": 10141 - }, - { - "epoch": 1.83, - "grad_norm": 0.0, - "learning_rate": 7.173889795302494e-06, - "loss": 0.5657, - "step": 10142 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1719766242027865e-06, - "loss": 0.679, - "step": 10143 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.170063565606694e-06, - "loss": 0.6928, - "step": 10144 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.168150619590318e-06, - "loss": 0.5719, - "step": 10145 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.166237786229765e-06, - "loss": 0.7094, - "step": 10146 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1643250656011265e-06, - "loss": 0.5788, - "step": 10147 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.162412457780492e-06, - "loss": 0.6188, - "step": 10148 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.160499962843959e-06, - "loss": 0.6582, - "step": 10149 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1585875808675945e-06, - "loss": 0.737, - "step": 10150 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.156675311927485e-06, - "loss": 0.6352, - "step": 10151 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1547631560997045e-06, - "loss": 0.6801, - "step": 10152 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.152851113460315e-06, - "loss": 0.6988, - "step": 10153 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1509391840853905e-06, - "loss": 0.5789, - "step": 10154 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.149027368050984e-06, - "loss": 0.5945, - "step": 10155 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.147115665433155e-06, - "loss": 0.7389, - "step": 10156 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.145204076307951e-06, - "loss": 0.5854, - "step": 10157 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.143292600751417e-06, - "loss": 0.6009, - "step": 10158 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.141381238839598e-06, - "loss": 0.5928, - "step": 10159 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.139469990648532e-06, - "loss": 0.5789, - "step": 10160 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.137558856254248e-06, - "loss": 0.6376, - "step": 10161 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.135647835732777e-06, - "loss": 0.5824, - "step": 10162 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.133736929160143e-06, - "loss": 0.6529, - "step": 10163 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.131826136612367e-06, - "loss": 0.6396, - "step": 10164 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.129915458165457e-06, - "loss": 0.6945, - "step": 10165 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.128004893895426e-06, - "loss": 0.6725, - "step": 10166 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.126094443878282e-06, - "loss": 0.6411, - "step": 10167 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.124184108190024e-06, - "loss": 0.6237, - "step": 10168 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.122273886906648e-06, - "loss": 0.6779, - "step": 10169 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.120363780104147e-06, - "loss": 0.6053, - "step": 10170 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.11845378785851e-06, - "loss": 0.6413, - "step": 10171 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1165439102457164e-06, - "loss": 0.5863, - "step": 10172 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.1146341473417455e-06, - "loss": 0.708, - "step": 10173 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.112724499222569e-06, - "loss": 0.5965, - "step": 10174 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.110814965964159e-06, - "loss": 0.5803, - "step": 10175 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.108905547642482e-06, - "loss": 0.5738, - "step": 10176 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.106996244333491e-06, - "loss": 0.5815, - "step": 10177 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.105087056113147e-06, - "loss": 0.623, - "step": 10178 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.103177983057401e-06, - "loss": 0.6228, - "step": 10179 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.101269025242197e-06, - "loss": 0.685, - "step": 10180 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.0993601827434755e-06, - "loss": 0.4954, - "step": 10181 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.097451455637175e-06, - "loss": 0.6584, - "step": 10182 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.0955428439992276e-06, - "loss": 0.7076, - "step": 10183 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.093634347905562e-06, - "loss": 0.585, - "step": 10184 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.091725967432101e-06, - "loss": 0.6177, - "step": 10185 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.089817702654766e-06, - "loss": 0.6965, - "step": 10186 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.087909553649465e-06, - "loss": 0.6779, - "step": 10187 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.086001520492111e-06, - "loss": 0.5682, - "step": 10188 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.084093603258608e-06, - "loss": 0.6556, - "step": 10189 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.082185802024859e-06, - "loss": 0.6783, - "step": 10190 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.080278116866753e-06, - "loss": 0.6637, - "step": 10191 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.078370547860188e-06, - "loss": 0.5358, - "step": 10192 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.076463095081048e-06, - "loss": 0.7112, - "step": 10193 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.0745557586052176e-06, - "loss": 0.611, - "step": 10194 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.0726485385085666e-06, - "loss": 0.5598, - "step": 10195 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.0707414348669705e-06, - "loss": 0.6287, - "step": 10196 - }, - { - "epoch": 1.84, - "grad_norm": 0.0, - "learning_rate": 7.068834447756299e-06, - "loss": 0.4913, - "step": 10197 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.066927577252416e-06, - "loss": 0.5801, - "step": 10198 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0650208234311744e-06, - "loss": 0.6008, - "step": 10199 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.063114186368437e-06, - "loss": 0.5021, - "step": 10200 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0612076661400475e-06, - "loss": 0.6518, - "step": 10201 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.059301262821854e-06, - "loss": 0.6838, - "step": 10202 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.057394976489691e-06, - "loss": 0.5289, - "step": 10203 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.055488807219395e-06, - "loss": 0.6511, - "step": 10204 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0535827550868005e-06, - "loss": 0.5531, - "step": 10205 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.051676820167732e-06, - "loss": 0.6917, - "step": 10206 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0497710025380085e-06, - "loss": 0.6161, - "step": 10207 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.047865302273449e-06, - "loss": 0.5714, - "step": 10208 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.045959719449869e-06, - "loss": 0.6045, - "step": 10209 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.044054254143069e-06, - "loss": 0.6801, - "step": 10210 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.042148906428854e-06, - "loss": 0.6356, - "step": 10211 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.04024367638302e-06, - "loss": 0.6176, - "step": 10212 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.038338564081367e-06, - "loss": 0.7325, - "step": 10213 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.036433569599679e-06, - "loss": 0.5149, - "step": 10214 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.034528693013738e-06, - "loss": 0.5547, - "step": 10215 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.032623934399328e-06, - "loss": 0.5304, - "step": 10216 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0307192938322224e-06, - "loss": 0.5903, - "step": 10217 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.028814771388188e-06, - "loss": 0.6175, - "step": 10218 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.026910367142994e-06, - "loss": 0.6097, - "step": 10219 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.025006081172394e-06, - "loss": 0.6377, - "step": 10220 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.023101913552153e-06, - "loss": 0.671, - "step": 10221 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.021197864358016e-06, - "loss": 0.5794, - "step": 10222 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0192939336657296e-06, - "loss": 0.6947, - "step": 10223 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.017390121551042e-06, - "loss": 0.6493, - "step": 10224 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.015486428089679e-06, - "loss": 0.6091, - "step": 10225 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.0135828533573814e-06, - "loss": 0.493, - "step": 10226 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.011679397429873e-06, - "loss": 0.5806, - "step": 10227 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.009776060382877e-06, - "loss": 0.5775, - "step": 10228 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.007872842292113e-06, - "loss": 0.5647, - "step": 10229 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.005969743233293e-06, - "loss": 0.6216, - "step": 10230 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.004066763282126e-06, - "loss": 0.6763, - "step": 10231 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.002163902514316e-06, - "loss": 0.5229, - "step": 10232 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 7.00026116100556e-06, - "loss": 0.7026, - "step": 10233 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.998358538831553e-06, - "loss": 0.537, - "step": 10234 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.996456036067988e-06, - "loss": 0.6258, - "step": 10235 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.994553652790546e-06, - "loss": 0.6263, - "step": 10236 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.992651389074907e-06, - "loss": 0.576, - "step": 10237 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.990749244996749e-06, - "loss": 0.6121, - "step": 10238 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.988847220631742e-06, - "loss": 0.6801, - "step": 10239 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.986945316055551e-06, - "loss": 0.5057, - "step": 10240 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.985043531343836e-06, - "loss": 0.6616, - "step": 10241 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.983141866572253e-06, - "loss": 0.528, - "step": 10242 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.981240321816456e-06, - "loss": 0.6316, - "step": 10243 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.97933889715209e-06, - "loss": 0.5514, - "step": 10244 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.977437592654797e-06, - "loss": 0.6339, - "step": 10245 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.975536408400214e-06, - "loss": 0.6224, - "step": 10246 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.973635344463978e-06, - "loss": 0.6028, - "step": 10247 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.97173440092171e-06, - "loss": 0.7492, - "step": 10248 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.969833577849034e-06, - "loss": 0.5962, - "step": 10249 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.967932875321569e-06, - "loss": 0.5968, - "step": 10250 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.966032293414929e-06, - "loss": 0.5372, - "step": 10251 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.964131832204724e-06, - "loss": 0.6075, - "step": 10252 - }, - { - "epoch": 1.85, - "grad_norm": 0.0, - "learning_rate": 6.962231491766551e-06, - "loss": 0.6892, - "step": 10253 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.960331272176016e-06, - "loss": 0.6897, - "step": 10254 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.958431173508713e-06, - "loss": 0.6061, - "step": 10255 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.956531195840226e-06, - "loss": 0.663, - "step": 10256 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.954631339246141e-06, - "loss": 0.5888, - "step": 10257 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.952731603802037e-06, - "loss": 0.5672, - "step": 10258 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.950831989583492e-06, - "loss": 0.6997, - "step": 10259 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.948932496666074e-06, - "loss": 0.6173, - "step": 10260 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.947033125125347e-06, - "loss": 0.5465, - "step": 10261 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.9451338750368755e-06, - "loss": 0.6113, - "step": 10262 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.943234746476208e-06, - "loss": 0.5476, - "step": 10263 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.941335739518901e-06, - "loss": 0.681, - "step": 10264 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.939436854240498e-06, - "loss": 0.605, - "step": 10265 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.937538090716536e-06, - "loss": 0.7065, - "step": 10266 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.935639449022559e-06, - "loss": 0.681, - "step": 10267 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.933740929234094e-06, - "loss": 0.6317, - "step": 10268 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.9318425314266646e-06, - "loss": 0.6397, - "step": 10269 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.929944255675802e-06, - "loss": 0.6828, - "step": 10270 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.928046102057011e-06, - "loss": 0.6086, - "step": 10271 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.92614807064581e-06, - "loss": 0.6568, - "step": 10272 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.924250161517705e-06, - "loss": 0.6001, - "step": 10273 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.922352374748194e-06, - "loss": 0.5893, - "step": 10274 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.920454710412781e-06, - "loss": 0.5382, - "step": 10275 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.918557168586955e-06, - "loss": 0.6265, - "step": 10276 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.916659749346206e-06, - "loss": 0.6693, - "step": 10277 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.914762452766012e-06, - "loss": 0.6566, - "step": 10278 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.912865278921852e-06, - "loss": 0.6378, - "step": 10279 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.910968227889199e-06, - "loss": 0.6301, - "step": 10280 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.909071299743524e-06, - "loss": 0.5156, - "step": 10281 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.907174494560287e-06, - "loss": 0.7109, - "step": 10282 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.905277812414945e-06, - "loss": 0.5476, - "step": 10283 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.903381253382955e-06, - "loss": 0.5481, - "step": 10284 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.901484817539765e-06, - "loss": 0.7203, - "step": 10285 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.899588504960817e-06, - "loss": 0.6326, - "step": 10286 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.897692315721547e-06, - "loss": 0.5257, - "step": 10287 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.895796249897391e-06, - "loss": 0.6518, - "step": 10288 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.893900307563779e-06, - "loss": 0.5952, - "step": 10289 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.892004488796134e-06, - "loss": 0.6069, - "step": 10290 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.890108793669874e-06, - "loss": 0.6466, - "step": 10291 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.888213222260418e-06, - "loss": 0.5754, - "step": 10292 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.8863177746431664e-06, - "loss": 0.7038, - "step": 10293 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.884422450893529e-06, - "loss": 0.7483, - "step": 10294 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.8825272510869035e-06, - "loss": 0.6329, - "step": 10295 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.880632175298683e-06, - "loss": 0.6133, - "step": 10296 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.87873722360426e-06, - "loss": 0.6544, - "step": 10297 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.876842396079018e-06, - "loss": 0.5977, - "step": 10298 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.874947692798332e-06, - "loss": 0.61, - "step": 10299 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.873053113837587e-06, - "loss": 0.6459, - "step": 10300 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.871158659272141e-06, - "loss": 0.612, - "step": 10301 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.869264329177365e-06, - "loss": 0.6279, - "step": 10302 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.867370123628616e-06, - "loss": 0.6825, - "step": 10303 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.865476042701249e-06, - "loss": 0.5864, - "step": 10304 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.863582086470617e-06, - "loss": 0.6624, - "step": 10305 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.861688255012062e-06, - "loss": 0.5054, - "step": 10306 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.859794548400922e-06, - "loss": 0.5713, - "step": 10307 - }, - { - "epoch": 1.86, - "grad_norm": 0.0, - "learning_rate": 6.857900966712543e-06, - "loss": 0.6324, - "step": 10308 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.856007510022238e-06, - "loss": 0.5437, - "step": 10309 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.854114178405342e-06, - "loss": 0.625, - "step": 10310 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8522209719371735e-06, - "loss": 0.6148, - "step": 10311 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8503278906930445e-06, - "loss": 0.5829, - "step": 10312 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.848434934748272e-06, - "loss": 0.5669, - "step": 10313 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8465421041781554e-06, - "loss": 0.5768, - "step": 10314 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.844649399057998e-06, - "loss": 0.6422, - "step": 10315 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.84275681946309e-06, - "loss": 0.6853, - "step": 10316 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8408643654687246e-06, - "loss": 0.6028, - "step": 10317 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.838972037150187e-06, - "loss": 0.6869, - "step": 10318 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.837079834582756e-06, - "loss": 0.6333, - "step": 10319 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.83518775784171e-06, - "loss": 0.5255, - "step": 10320 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.833295807002313e-06, - "loss": 0.5936, - "step": 10321 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.831403982139836e-06, - "loss": 0.7362, - "step": 10322 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.829512283329539e-06, - "loss": 0.7304, - "step": 10323 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.827620710646672e-06, - "loss": 0.5768, - "step": 10324 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8257292641664886e-06, - "loss": 0.6578, - "step": 10325 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.823837943964231e-06, - "loss": 0.6549, - "step": 10326 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8219467501151425e-06, - "loss": 0.62, - "step": 10327 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.820055682694456e-06, - "loss": 0.5921, - "step": 10328 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.818164741777401e-06, - "loss": 0.5084, - "step": 10329 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.81627392743921e-06, - "loss": 0.5706, - "step": 10330 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.814383239755088e-06, - "loss": 0.7484, - "step": 10331 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.812492678800262e-06, - "loss": 0.561, - "step": 10332 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.810602244649937e-06, - "loss": 0.5801, - "step": 10333 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.808711937379315e-06, - "loss": 0.6243, - "step": 10334 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.806821757063603e-06, - "loss": 0.6418, - "step": 10335 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.804931703777991e-06, - "loss": 0.6287, - "step": 10336 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.8030417775976675e-06, - "loss": 0.6112, - "step": 10337 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.801151978597825e-06, - "loss": 0.7688, - "step": 10338 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.799262306853631e-06, - "loss": 0.6174, - "step": 10339 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.797372762440266e-06, - "loss": 0.5572, - "step": 10340 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.7954833454329e-06, - "loss": 0.5377, - "step": 10341 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.793594055906695e-06, - "loss": 0.5709, - "step": 10342 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.791704893936812e-06, - "loss": 0.5891, - "step": 10343 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.789815859598406e-06, - "loss": 0.5962, - "step": 10344 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.787926952966625e-06, - "loss": 0.6015, - "step": 10345 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.786038174116611e-06, - "loss": 0.6459, - "step": 10346 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.784149523123502e-06, - "loss": 0.6119, - "step": 10347 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.782261000062436e-06, - "loss": 0.6125, - "step": 10348 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.78037260500854e-06, - "loss": 0.5799, - "step": 10349 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.778484338036935e-06, - "loss": 0.5314, - "step": 10350 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.7765961992227425e-06, - "loss": 0.6177, - "step": 10351 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.774708188641077e-06, - "loss": 0.6139, - "step": 10352 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.772820306367047e-06, - "loss": 0.603, - "step": 10353 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.770932552475751e-06, - "loss": 0.5553, - "step": 10354 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.769044927042287e-06, - "loss": 0.6893, - "step": 10355 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.767157430141755e-06, - "loss": 0.6039, - "step": 10356 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.765270061849237e-06, - "loss": 0.5058, - "step": 10357 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.763382822239815e-06, - "loss": 0.7084, - "step": 10358 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.761495711388573e-06, - "loss": 0.5591, - "step": 10359 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.759608729370577e-06, - "loss": 0.5225, - "step": 10360 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.757721876260901e-06, - "loss": 0.5994, - "step": 10361 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.7558351521346e-06, - "loss": 0.6439, - "step": 10362 - }, - { - "epoch": 1.87, - "grad_norm": 0.0, - "learning_rate": 6.7539485570667315e-06, - "loss": 0.6104, - "step": 10363 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.752062091132353e-06, - "loss": 0.5855, - "step": 10364 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.750175754406509e-06, - "loss": 0.66, - "step": 10365 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.74828954696424e-06, - "loss": 0.5819, - "step": 10366 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.746403468880581e-06, - "loss": 0.5753, - "step": 10367 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.744517520230571e-06, - "loss": 0.5581, - "step": 10368 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.742631701089228e-06, - "loss": 0.663, - "step": 10369 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.740746011531576e-06, - "loss": 0.5586, - "step": 10370 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.738860451632632e-06, - "loss": 0.6489, - "step": 10371 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.736975021467402e-06, - "loss": 0.588, - "step": 10372 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.735089721110899e-06, - "loss": 0.5205, - "step": 10373 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.733204550638121e-06, - "loss": 0.6881, - "step": 10374 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.731319510124059e-06, - "loss": 0.6155, - "step": 10375 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.729434599643713e-06, - "loss": 0.5647, - "step": 10376 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.727549819272054e-06, - "loss": 0.6612, - "step": 10377 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.725665169084072e-06, - "loss": 0.5847, - "step": 10378 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.72378064915474e-06, - "loss": 0.6701, - "step": 10379 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.721896259559022e-06, - "loss": 0.5973, - "step": 10380 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.72001200037189e-06, - "loss": 0.5083, - "step": 10381 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.718127871668301e-06, - "loss": 0.7319, - "step": 10382 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.7162438735232076e-06, - "loss": 0.634, - "step": 10383 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.714360006011556e-06, - "loss": 0.6101, - "step": 10384 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.71247626920829e-06, - "loss": 0.6808, - "step": 10385 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.710592663188352e-06, - "loss": 0.592, - "step": 10386 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.7087091880266745e-06, - "loss": 0.5625, - "step": 10387 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.706825843798179e-06, - "loss": 0.6761, - "step": 10388 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.704942630577797e-06, - "loss": 0.5391, - "step": 10389 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.7030595484404415e-06, - "loss": 0.6375, - "step": 10390 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.701176597461027e-06, - "loss": 0.7601, - "step": 10391 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.699293777714457e-06, - "loss": 0.647, - "step": 10392 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.697411089275632e-06, - "loss": 0.6885, - "step": 10393 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.695528532219454e-06, - "loss": 0.6142, - "step": 10394 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.693646106620812e-06, - "loss": 0.7064, - "step": 10395 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.69176381255459e-06, - "loss": 0.5157, - "step": 10396 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.6898816500956735e-06, - "loss": 0.6045, - "step": 10397 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.6879996193189364e-06, - "loss": 0.6143, - "step": 10398 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.686117720299247e-06, - "loss": 0.6065, - "step": 10399 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.684235953111471e-06, - "loss": 0.639, - "step": 10400 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.682354317830466e-06, - "loss": 0.6307, - "step": 10401 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.6804728145310936e-06, - "loss": 0.6652, - "step": 10402 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.678591443288199e-06, - "loss": 0.5855, - "step": 10403 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.676710204176624e-06, - "loss": 0.7164, - "step": 10404 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.674829097271214e-06, - "loss": 0.5855, - "step": 10405 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.6729481226467986e-06, - "loss": 0.6782, - "step": 10406 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.671067280378205e-06, - "loss": 0.5888, - "step": 10407 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.669186570540258e-06, - "loss": 0.6432, - "step": 10408 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.6673059932077735e-06, - "loss": 0.6647, - "step": 10409 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.665425548455567e-06, - "loss": 0.5854, - "step": 10410 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.663545236358445e-06, - "loss": 0.5151, - "step": 10411 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.66166505699121e-06, - "loss": 0.5658, - "step": 10412 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.659785010428654e-06, - "loss": 0.6254, - "step": 10413 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.657905096745578e-06, - "loss": 0.6332, - "step": 10414 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.656025316016761e-06, - "loss": 0.5405, - "step": 10415 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.654145668316985e-06, - "loss": 0.7243, - "step": 10416 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.652266153721025e-06, - "loss": 0.6155, - "step": 10417 - }, - { - "epoch": 1.88, - "grad_norm": 0.0, - "learning_rate": 6.650386772303652e-06, - "loss": 0.6145, - "step": 10418 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.648507524139631e-06, - "loss": 0.7663, - "step": 10419 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.646628409303725e-06, - "loss": 0.7513, - "step": 10420 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.644749427870686e-06, - "loss": 0.6361, - "step": 10421 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.642870579915262e-06, - "loss": 0.5418, - "step": 10422 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.640991865512193e-06, - "loss": 0.5817, - "step": 10423 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.639113284736225e-06, - "loss": 0.6379, - "step": 10424 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.637234837662087e-06, - "loss": 0.5933, - "step": 10425 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.635356524364509e-06, - "loss": 0.6302, - "step": 10426 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.633478344918212e-06, - "loss": 0.5036, - "step": 10427 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.631600299397914e-06, - "loss": 0.505, - "step": 10428 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.629722387878328e-06, - "loss": 0.6276, - "step": 10429 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.627844610434158e-06, - "loss": 0.5852, - "step": 10430 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.625966967140104e-06, - "loss": 0.6687, - "step": 10431 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.624089458070866e-06, - "loss": 0.7069, - "step": 10432 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.622212083301132e-06, - "loss": 0.6285, - "step": 10433 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.6203348429055866e-06, - "loss": 0.5819, - "step": 10434 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.618457736958912e-06, - "loss": 0.6936, - "step": 10435 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.616580765535785e-06, - "loss": 0.6218, - "step": 10436 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.614703928710868e-06, - "loss": 0.5669, - "step": 10437 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.612827226558829e-06, - "loss": 0.7122, - "step": 10438 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.610950659154322e-06, - "loss": 0.6063, - "step": 10439 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.609074226572007e-06, - "loss": 0.6332, - "step": 10440 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.607197928886528e-06, - "loss": 0.772, - "step": 10441 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.605321766172524e-06, - "loss": 0.6942, - "step": 10442 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.603445738504639e-06, - "loss": 0.669, - "step": 10443 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.601569845957503e-06, - "loss": 0.5852, - "step": 10444 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.599694088605739e-06, - "loss": 0.5845, - "step": 10445 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.597818466523968e-06, - "loss": 0.6217, - "step": 10446 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.595942979786805e-06, - "loss": 0.5806, - "step": 10447 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.594067628468863e-06, - "loss": 0.7726, - "step": 10448 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.592192412644746e-06, - "loss": 0.6738, - "step": 10449 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.590317332389052e-06, - "loss": 0.7945, - "step": 10450 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.588442387776381e-06, - "loss": 0.5223, - "step": 10451 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.58656757888131e-06, - "loss": 0.6841, - "step": 10452 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.584692905778429e-06, - "loss": 0.6143, - "step": 10453 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.582818368542317e-06, - "loss": 0.696, - "step": 10454 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.580943967247542e-06, - "loss": 0.6675, - "step": 10455 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.579069701968673e-06, - "loss": 0.546, - "step": 10456 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.577195572780273e-06, - "loss": 0.7932, - "step": 10457 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.575321579756897e-06, - "loss": 0.6481, - "step": 10458 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.573447722973097e-06, - "loss": 0.6261, - "step": 10459 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.571574002503414e-06, - "loss": 0.6002, - "step": 10460 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.56970041842239e-06, - "loss": 0.6809, - "step": 10461 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.56782697080456e-06, - "loss": 0.5251, - "step": 10462 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.565953659724455e-06, - "loss": 0.6342, - "step": 10463 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.564080485256592e-06, - "loss": 0.6205, - "step": 10464 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.562207447475497e-06, - "loss": 0.658, - "step": 10465 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.560334546455678e-06, - "loss": 0.6808, - "step": 10466 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.558461782271646e-06, - "loss": 0.6928, - "step": 10467 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.556589154997897e-06, - "loss": 0.5641, - "step": 10468 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.554716664708929e-06, - "loss": 0.6385, - "step": 10469 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.552844311479235e-06, - "loss": 0.5961, - "step": 10470 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.550972095383301e-06, - "loss": 0.7004, - "step": 10471 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.549100016495602e-06, - "loss": 0.5674, - "step": 10472 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.547228074890619e-06, - "loss": 0.68, - "step": 10473 - }, - { - "epoch": 1.89, - "grad_norm": 0.0, - "learning_rate": 6.545356270642822e-06, - "loss": 0.6584, - "step": 10474 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.543484603826666e-06, - "loss": 0.7388, - "step": 10475 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.541613074516615e-06, - "loss": 0.6462, - "step": 10476 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.539741682787118e-06, - "loss": 0.6372, - "step": 10477 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.537870428712627e-06, - "loss": 0.6797, - "step": 10478 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.53599931236758e-06, - "loss": 0.6367, - "step": 10479 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.534128333826415e-06, - "loss": 0.5501, - "step": 10480 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.532257493163563e-06, - "loss": 0.6124, - "step": 10481 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.530386790453449e-06, - "loss": 0.6015, - "step": 10482 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.528516225770492e-06, - "loss": 0.6375, - "step": 10483 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.526645799189106e-06, - "loss": 0.618, - "step": 10484 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.524775510783699e-06, - "loss": 0.5724, - "step": 10485 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.522905360628676e-06, - "loss": 0.7637, - "step": 10486 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.521035348798436e-06, - "loss": 0.563, - "step": 10487 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.519165475367366e-06, - "loss": 0.6905, - "step": 10488 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.517295740409864e-06, - "loss": 0.6456, - "step": 10489 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.515426144000297e-06, - "loss": 0.5146, - "step": 10490 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.5135566862130495e-06, - "loss": 0.6581, - "step": 10491 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.51168736712249e-06, - "loss": 0.6625, - "step": 10492 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.50981818680298e-06, - "loss": 0.4989, - "step": 10493 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.507949145328883e-06, - "loss": 0.7281, - "step": 10494 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.506080242774553e-06, - "loss": 0.5534, - "step": 10495 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.5042114792143325e-06, - "loss": 0.5874, - "step": 10496 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.502342854722576e-06, - "loss": 0.6564, - "step": 10497 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.5004743693736046e-06, - "loss": 0.6575, - "step": 10498 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.498606023241761e-06, - "loss": 0.6441, - "step": 10499 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.496737816401368e-06, - "loss": 0.634, - "step": 10500 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.494869748926744e-06, - "loss": 0.5591, - "step": 10501 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.4930018208922075e-06, - "loss": 0.6592, - "step": 10502 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.491134032372066e-06, - "loss": 0.6702, - "step": 10503 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.489266383440627e-06, - "loss": 0.6997, - "step": 10504 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.487398874172184e-06, - "loss": 0.6149, - "step": 10505 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.485531504641027e-06, - "loss": 0.6643, - "step": 10506 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.483664274921451e-06, - "loss": 0.6543, - "step": 10507 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.481797185087733e-06, - "loss": 0.5569, - "step": 10508 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.479930235214151e-06, - "loss": 0.5561, - "step": 10509 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.478063425374972e-06, - "loss": 0.6166, - "step": 10510 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.4761967556444634e-06, - "loss": 0.584, - "step": 10511 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.47433022609689e-06, - "loss": 0.6023, - "step": 10512 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.472463836806495e-06, - "loss": 0.5003, - "step": 10513 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.470597587847534e-06, - "loss": 0.5722, - "step": 10514 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.468731479294243e-06, - "loss": 0.5949, - "step": 10515 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.466865511220868e-06, - "loss": 0.6026, - "step": 10516 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.464999683701633e-06, - "loss": 0.5797, - "step": 10517 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.463133996810764e-06, - "loss": 0.7112, - "step": 10518 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.461268450622488e-06, - "loss": 0.6639, - "step": 10519 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.459403045211016e-06, - "loss": 0.5159, - "step": 10520 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.457537780650553e-06, - "loss": 0.6025, - "step": 10521 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.4556726570153085e-06, - "loss": 0.7327, - "step": 10522 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.453807674379473e-06, - "loss": 0.5676, - "step": 10523 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.4519428328172466e-06, - "loss": 0.7254, - "step": 10524 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.450078132402811e-06, - "loss": 0.6181, - "step": 10525 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.448213573210347e-06, - "loss": 0.6626, - "step": 10526 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.446349155314039e-06, - "loss": 0.6618, - "step": 10527 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.4444848787880425e-06, - "loss": 0.6384, - "step": 10528 - }, - { - "epoch": 1.9, - "grad_norm": 0.0, - "learning_rate": 6.44262074370653e-06, - "loss": 0.6443, - "step": 10529 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.44075675014366e-06, - "loss": 0.6008, - "step": 10530 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.438892898173579e-06, - "loss": 0.5515, - "step": 10531 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.437029187870443e-06, - "loss": 0.5601, - "step": 10532 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.43516561930839e-06, - "loss": 0.5924, - "step": 10533 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.4333021925615526e-06, - "loss": 0.6592, - "step": 10534 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.4314389077040705e-06, - "loss": 0.523, - "step": 10535 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.429575764810056e-06, - "loss": 0.586, - "step": 10536 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.4277127639536356e-06, - "loss": 0.6398, - "step": 10537 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.42584990520892e-06, - "loss": 0.6482, - "step": 10538 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.4239871886500185e-06, - "loss": 0.6162, - "step": 10539 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.422124614351033e-06, - "loss": 0.782, - "step": 10540 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.420262182386061e-06, - "loss": 0.5795, - "step": 10541 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.418399892829192e-06, - "loss": 0.6998, - "step": 10542 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.416537745754509e-06, - "loss": 0.5923, - "step": 10543 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.414675741236094e-06, - "loss": 0.6103, - "step": 10544 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.412813879348019e-06, - "loss": 0.5396, - "step": 10545 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.410952160164354e-06, - "loss": 0.702, - "step": 10546 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.409090583759159e-06, - "loss": 0.6037, - "step": 10547 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.407229150206494e-06, - "loss": 0.7362, - "step": 10548 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.405367859580408e-06, - "loss": 0.6021, - "step": 10549 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.403506711954948e-06, - "loss": 0.6042, - "step": 10550 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.401645707404151e-06, - "loss": 0.5254, - "step": 10551 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.39978484600205e-06, - "loss": 0.5997, - "step": 10552 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.397924127822677e-06, - "loss": 0.577, - "step": 10553 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.396063552940053e-06, - "loss": 0.5789, - "step": 10554 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.394203121428195e-06, - "loss": 0.6556, - "step": 10555 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.392342833361112e-06, - "loss": 0.613, - "step": 10556 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.390482688812814e-06, - "loss": 0.6425, - "step": 10557 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.388622687857298e-06, - "loss": 0.5529, - "step": 10558 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.386762830568556e-06, - "loss": 0.6318, - "step": 10559 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.38490311702058e-06, - "loss": 0.685, - "step": 10560 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.3830435472873485e-06, - "loss": 0.6107, - "step": 10561 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.381184121442843e-06, - "loss": 0.565, - "step": 10562 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.3793248395610315e-06, - "loss": 0.5003, - "step": 10563 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.3774657017158796e-06, - "loss": 0.6851, - "step": 10564 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.375606707981354e-06, - "loss": 0.6232, - "step": 10565 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.373747858431396e-06, - "loss": 0.657, - "step": 10566 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.371889153139963e-06, - "loss": 0.6495, - "step": 10567 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.370030592180993e-06, - "loss": 0.6796, - "step": 10568 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.368172175628425e-06, - "loss": 0.6943, - "step": 10569 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.3663139035561894e-06, - "loss": 0.6627, - "step": 10570 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.364455776038212e-06, - "loss": 0.5912, - "step": 10571 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.362597793148411e-06, - "loss": 0.6332, - "step": 10572 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.360739954960706e-06, - "loss": 0.5502, - "step": 10573 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.358882261548994e-06, - "loss": 0.5782, - "step": 10574 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.357024712987186e-06, - "loss": 0.6607, - "step": 10575 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.355167309349175e-06, - "loss": 0.6388, - "step": 10576 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.353310050708849e-06, - "loss": 0.5438, - "step": 10577 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.351452937140101e-06, - "loss": 0.52, - "step": 10578 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.349595968716804e-06, - "loss": 0.5979, - "step": 10579 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.3477391455128355e-06, - "loss": 0.5664, - "step": 10580 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.345882467602058e-06, - "loss": 0.6497, - "step": 10581 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.344025935058333e-06, - "loss": 0.5025, - "step": 10582 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.342169547955523e-06, - "loss": 0.7075, - "step": 10583 - }, - { - "epoch": 1.91, - "grad_norm": 0.0, - "learning_rate": 6.340313306367474e-06, - "loss": 0.6534, - "step": 10584 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.338457210368029e-06, - "loss": 0.5942, - "step": 10585 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.33660126003103e-06, - "loss": 0.6255, - "step": 10586 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.334745455430309e-06, - "loss": 0.5959, - "step": 10587 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.332889796639696e-06, - "loss": 0.5568, - "step": 10588 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.331034283733006e-06, - "loss": 0.6777, - "step": 10589 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.329178916784056e-06, - "loss": 0.6078, - "step": 10590 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.327323695866658e-06, - "loss": 0.5849, - "step": 10591 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.325468621054616e-06, - "loss": 0.475, - "step": 10592 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.323613692421724e-06, - "loss": 0.6132, - "step": 10593 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.3217589100417795e-06, - "loss": 0.733, - "step": 10594 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.31990427398857e-06, - "loss": 0.5794, - "step": 10595 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.31804978433587e-06, - "loss": 0.6074, - "step": 10596 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.316195441157458e-06, - "loss": 0.767, - "step": 10597 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.3143412445270975e-06, - "loss": 0.7611, - "step": 10598 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.31248719451856e-06, - "loss": 0.5417, - "step": 10599 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.310633291205599e-06, - "loss": 0.5423, - "step": 10600 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.3087795346619666e-06, - "loss": 0.5934, - "step": 10601 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.3069259249614046e-06, - "loss": 0.6206, - "step": 10602 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.30507246217766e-06, - "loss": 0.6076, - "step": 10603 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.3032191463844605e-06, - "loss": 0.6201, - "step": 10604 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.301365977655538e-06, - "loss": 0.6153, - "step": 10605 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.29951295606461e-06, - "loss": 0.571, - "step": 10606 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.297660081685397e-06, - "loss": 0.6737, - "step": 10607 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.295807354591609e-06, - "loss": 0.5716, - "step": 10608 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.29395477485695e-06, - "loss": 0.723, - "step": 10609 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.292102342555116e-06, - "loss": 0.6031, - "step": 10610 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.29025005775981e-06, - "loss": 0.669, - "step": 10611 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.288397920544706e-06, - "loss": 0.7073, - "step": 10612 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.286545930983492e-06, - "loss": 0.5468, - "step": 10613 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.284694089149843e-06, - "loss": 0.6233, - "step": 10614 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.282842395117426e-06, - "loss": 0.6183, - "step": 10615 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.280990848959908e-06, - "loss": 0.5178, - "step": 10616 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.279139450750945e-06, - "loss": 0.5939, - "step": 10617 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.277288200564191e-06, - "loss": 0.6688, - "step": 10618 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.275437098473288e-06, - "loss": 0.574, - "step": 10619 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.273586144551875e-06, - "loss": 0.6825, - "step": 10620 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.271735338873592e-06, - "loss": 0.6218, - "step": 10621 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.269884681512064e-06, - "loss": 0.5351, - "step": 10622 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.26803417254091e-06, - "loss": 0.5655, - "step": 10623 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.266183812033753e-06, - "loss": 0.6372, - "step": 10624 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.264333600064199e-06, - "loss": 0.6295, - "step": 10625 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.262483536705858e-06, - "loss": 0.7168, - "step": 10626 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.260633622032322e-06, - "loss": 0.6932, - "step": 10627 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.258783856117184e-06, - "loss": 0.6038, - "step": 10628 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.256934239034035e-06, - "loss": 0.5721, - "step": 10629 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.2550847708564546e-06, - "loss": 0.5973, - "step": 10630 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.2532354516580155e-06, - "loss": 0.5756, - "step": 10631 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.25138628151229e-06, - "loss": 0.6131, - "step": 10632 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.249537260492842e-06, - "loss": 0.6369, - "step": 10633 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.247688388673225e-06, - "loss": 0.6955, - "step": 10634 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.245839666126992e-06, - "loss": 0.6343, - "step": 10635 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.243991092927687e-06, - "loss": 0.6698, - "step": 10636 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.242142669148852e-06, - "loss": 0.6453, - "step": 10637 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.240294394864019e-06, - "loss": 0.6602, - "step": 10638 - }, - { - "epoch": 1.92, - "grad_norm": 0.0, - "learning_rate": 6.238446270146714e-06, - "loss": 0.5395, - "step": 10639 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.236598295070462e-06, - "loss": 0.6123, - "step": 10640 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.234750469708779e-06, - "loss": 0.6737, - "step": 10641 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.232902794135172e-06, - "loss": 0.6474, - "step": 10642 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.231055268423143e-06, - "loss": 0.5309, - "step": 10643 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.229207892646192e-06, - "loss": 0.5904, - "step": 10644 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.227360666877811e-06, - "loss": 0.7133, - "step": 10645 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.225513591191485e-06, - "loss": 0.5737, - "step": 10646 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.223666665660697e-06, - "loss": 0.5322, - "step": 10647 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.221819890358919e-06, - "loss": 0.5445, - "step": 10648 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.219973265359613e-06, - "loss": 0.6895, - "step": 10649 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.218126790736249e-06, - "loss": 0.578, - "step": 10650 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.216280466562281e-06, - "loss": 0.6189, - "step": 10651 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.214434292911156e-06, - "loss": 0.6217, - "step": 10652 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.212588269856319e-06, - "loss": 0.6125, - "step": 10653 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.210742397471212e-06, - "loss": 0.6552, - "step": 10654 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.208896675829261e-06, - "loss": 0.6095, - "step": 10655 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.207051105003898e-06, - "loss": 0.6266, - "step": 10656 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.205205685068538e-06, - "loss": 0.5975, - "step": 10657 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.203360416096596e-06, - "loss": 0.645, - "step": 10658 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.20151529816148e-06, - "loss": 0.6293, - "step": 10659 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.199670331336595e-06, - "loss": 0.5748, - "step": 10660 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.197825515695331e-06, - "loss": 0.5759, - "step": 10661 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1959808513110844e-06, - "loss": 0.6798, - "step": 10662 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1941363382572354e-06, - "loss": 0.5229, - "step": 10663 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.192291976607166e-06, - "loss": 0.5944, - "step": 10664 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.19044776643424e-06, - "loss": 0.5308, - "step": 10665 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.188603707811829e-06, - "loss": 0.6014, - "step": 10666 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.186759800813291e-06, - "loss": 0.591, - "step": 10667 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.184916045511983e-06, - "loss": 0.7071, - "step": 10668 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1830724419812475e-06, - "loss": 0.607, - "step": 10669 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.18122899029443e-06, - "loss": 0.5903, - "step": 10670 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.179385690524868e-06, - "loss": 0.6554, - "step": 10671 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.177542542745887e-06, - "loss": 0.5939, - "step": 10672 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.175699547030811e-06, - "loss": 0.645, - "step": 10673 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.173856703452956e-06, - "loss": 0.6011, - "step": 10674 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.172014012085638e-06, - "loss": 0.6683, - "step": 10675 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1701714730021625e-06, - "loss": 0.5514, - "step": 10676 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.168329086275822e-06, - "loss": 0.622, - "step": 10677 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.166486851979918e-06, - "loss": 0.5541, - "step": 10678 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.164644770187735e-06, - "loss": 0.588, - "step": 10679 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.162802840972553e-06, - "loss": 0.7029, - "step": 10680 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.160961064407646e-06, - "loss": 0.646, - "step": 10681 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.159119440566283e-06, - "loss": 0.6321, - "step": 10682 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.157277969521731e-06, - "loss": 0.6567, - "step": 10683 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.155436651347243e-06, - "loss": 0.5354, - "step": 10684 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1535954861160695e-06, - "loss": 0.6624, - "step": 10685 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.151754473901464e-06, - "loss": 0.5072, - "step": 10686 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.14991361477665e-06, - "loss": 0.5536, - "step": 10687 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.148072908814872e-06, - "loss": 0.5157, - "step": 10688 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.146232356089351e-06, - "loss": 0.5784, - "step": 10689 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.144391956673308e-06, - "loss": 0.6078, - "step": 10690 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.142551710639959e-06, - "loss": 0.5883, - "step": 10691 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.140711618062512e-06, - "loss": 0.6838, - "step": 10692 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.138871679014167e-06, - "loss": 0.6498, - "step": 10693 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.137031893568124e-06, - "loss": 0.7262, - "step": 10694 - }, - { - "epoch": 1.93, - "grad_norm": 0.0, - "learning_rate": 6.1351922617975644e-06, - "loss": 0.6342, - "step": 10695 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.133352783775681e-06, - "loss": 0.7599, - "step": 10696 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.1315134595756464e-06, - "loss": 0.6661, - "step": 10697 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.129674289270635e-06, - "loss": 0.5964, - "step": 10698 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.127835272933809e-06, - "loss": 0.6238, - "step": 10699 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.125996410638329e-06, - "loss": 0.5788, - "step": 10700 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.124157702457352e-06, - "loss": 0.5438, - "step": 10701 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.122319148464018e-06, - "loss": 0.6609, - "step": 10702 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.120480748731472e-06, - "loss": 0.7276, - "step": 10703 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.118642503332846e-06, - "loss": 0.5289, - "step": 10704 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.116804412341272e-06, - "loss": 0.5646, - "step": 10705 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.114966475829871e-06, - "loss": 0.641, - "step": 10706 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.113128693871757e-06, - "loss": 0.5448, - "step": 10707 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.1112910665400435e-06, - "loss": 0.7248, - "step": 10708 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.109453593907836e-06, - "loss": 0.7415, - "step": 10709 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.1076162760482275e-06, - "loss": 0.641, - "step": 10710 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.10577911303431e-06, - "loss": 0.5979, - "step": 10711 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.10394210493917e-06, - "loss": 0.5553, - "step": 10712 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.102105251835888e-06, - "loss": 0.6298, - "step": 10713 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.1002685537975396e-06, - "loss": 0.6424, - "step": 10714 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.098432010897184e-06, - "loss": 0.7007, - "step": 10715 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.096595623207888e-06, - "loss": 0.7324, - "step": 10716 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.094759390802709e-06, - "loss": 0.62, - "step": 10717 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.092923313754689e-06, - "loss": 0.5475, - "step": 10718 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.091087392136873e-06, - "loss": 0.6105, - "step": 10719 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.089251626022295e-06, - "loss": 0.5508, - "step": 10720 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0874160154839875e-06, - "loss": 0.7055, - "step": 10721 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.085580560594975e-06, - "loss": 0.6464, - "step": 10722 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.083745261428271e-06, - "loss": 0.6193, - "step": 10723 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.081910118056895e-06, - "loss": 0.558, - "step": 10724 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.080075130553842e-06, - "loss": 0.6026, - "step": 10725 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.078240298992115e-06, - "loss": 0.5533, - "step": 10726 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.07640562344471e-06, - "loss": 0.522, - "step": 10727 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.074571103984608e-06, - "loss": 0.7285, - "step": 10728 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0727367406847924e-06, - "loss": 0.6166, - "step": 10729 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0709025336182396e-06, - "loss": 0.6001, - "step": 10730 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.06906848285791e-06, - "loss": 0.6972, - "step": 10731 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.067234588476778e-06, - "loss": 0.6699, - "step": 10732 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0654008505477846e-06, - "loss": 0.5916, - "step": 10733 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0635672691438885e-06, - "loss": 0.5989, - "step": 10734 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.06173384433803e-06, - "loss": 0.5727, - "step": 10735 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.059900576203144e-06, - "loss": 0.6249, - "step": 10736 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0580674648121625e-06, - "loss": 0.6262, - "step": 10737 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.056234510238013e-06, - "loss": 0.6338, - "step": 10738 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0544017125536125e-06, - "loss": 0.5426, - "step": 10739 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.052569071831867e-06, - "loss": 0.5604, - "step": 10740 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.050736588145687e-06, - "loss": 0.5459, - "step": 10741 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.048904261567969e-06, - "loss": 0.5774, - "step": 10742 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.047072092171611e-06, - "loss": 0.6352, - "step": 10743 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.045240080029498e-06, - "loss": 0.6155, - "step": 10744 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0434082252145044e-06, - "loss": 0.5775, - "step": 10745 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.041576527799513e-06, - "loss": 0.7071, - "step": 10746 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.03974498785739e-06, - "loss": 0.6097, - "step": 10747 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.037913605460995e-06, - "loss": 0.613, - "step": 10748 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.0360823806831835e-06, - "loss": 0.5775, - "step": 10749 - }, - { - "epoch": 1.94, - "grad_norm": 0.0, - "learning_rate": 6.034251313596804e-06, - "loss": 0.6089, - "step": 10750 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.032420404274701e-06, - "loss": 0.6152, - "step": 10751 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.030589652789713e-06, - "loss": 0.5668, - "step": 10752 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.028759059214666e-06, - "loss": 0.6813, - "step": 10753 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.026928623622392e-06, - "loss": 0.5558, - "step": 10754 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.025098346085699e-06, - "loss": 0.6161, - "step": 10755 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.023268226677405e-06, - "loss": 0.6703, - "step": 10756 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.021438265470313e-06, - "loss": 0.6355, - "step": 10757 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.019608462537221e-06, - "loss": 0.6832, - "step": 10758 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.017778817950923e-06, - "loss": 0.7565, - "step": 10759 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.0159493317842074e-06, - "loss": 0.6058, - "step": 10760 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.0141200041098516e-06, - "loss": 0.6524, - "step": 10761 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.0122908350006335e-06, - "loss": 0.5988, - "step": 10762 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.010461824529313e-06, - "loss": 0.6038, - "step": 10763 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.008632972768658e-06, - "loss": 0.665, - "step": 10764 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.006804279791419e-06, - "loss": 0.5239, - "step": 10765 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.004975745670346e-06, - "loss": 0.6106, - "step": 10766 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.003147370478184e-06, - "loss": 0.6267, - "step": 10767 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 6.001319154287666e-06, - "loss": 0.6729, - "step": 10768 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.999491097171519e-06, - "loss": 0.5928, - "step": 10769 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.997663199202477e-06, - "loss": 0.6448, - "step": 10770 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.995835460453243e-06, - "loss": 0.5994, - "step": 10771 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9940078809965354e-06, - "loss": 0.581, - "step": 10772 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.992180460905057e-06, - "loss": 0.5711, - "step": 10773 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.990353200251504e-06, - "loss": 0.7237, - "step": 10774 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.988526099108571e-06, - "loss": 0.6096, - "step": 10775 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9866991575489434e-06, - "loss": 0.7368, - "step": 10776 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.984872375645299e-06, - "loss": 0.6282, - "step": 10777 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.983045753470308e-06, - "loss": 0.5898, - "step": 10778 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.981219291096636e-06, - "loss": 0.6913, - "step": 10779 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9793929885969484e-06, - "loss": 0.6986, - "step": 10780 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.977566846043894e-06, - "loss": 0.609, - "step": 10781 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.975740863510122e-06, - "loss": 0.5976, - "step": 10782 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.97391504106827e-06, - "loss": 0.6011, - "step": 10783 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.972089378790977e-06, - "loss": 0.562, - "step": 10784 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.970263876750871e-06, - "loss": 0.6852, - "step": 10785 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.96843853502057e-06, - "loss": 0.6256, - "step": 10786 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.96661335367269e-06, - "loss": 0.588, - "step": 10787 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9647883327798375e-06, - "loss": 0.5707, - "step": 10788 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.96296347241462e-06, - "loss": 0.6828, - "step": 10789 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.961138772649632e-06, - "loss": 0.5912, - "step": 10790 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9593142335574605e-06, - "loss": 0.7339, - "step": 10791 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.957489855210696e-06, - "loss": 0.6938, - "step": 10792 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.955665637681904e-06, - "loss": 0.7325, - "step": 10793 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.953841581043664e-06, - "loss": 0.567, - "step": 10794 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9520176853685386e-06, - "loss": 0.7142, - "step": 10795 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.950193950729082e-06, - "loss": 0.6449, - "step": 10796 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9483703771978475e-06, - "loss": 0.5481, - "step": 10797 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.946546964847381e-06, - "loss": 0.5761, - "step": 10798 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.9447237137502204e-06, - "loss": 0.6768, - "step": 10799 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.942900623978902e-06, - "loss": 0.5935, - "step": 10800 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.941077695605941e-06, - "loss": 0.5667, - "step": 10801 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.939254928703864e-06, - "loss": 0.558, - "step": 10802 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.937432323345183e-06, - "loss": 0.5881, - "step": 10803 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.935609879602402e-06, - "loss": 0.5183, - "step": 10804 - }, - { - "epoch": 1.95, - "grad_norm": 0.0, - "learning_rate": 5.933787597548024e-06, - "loss": 0.6293, - "step": 10805 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.931965477254542e-06, - "loss": 0.7719, - "step": 10806 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.930143518794442e-06, - "loss": 0.6439, - "step": 10807 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.928321722240205e-06, - "loss": 0.6329, - "step": 10808 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.926500087664303e-06, - "loss": 0.6375, - "step": 10809 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.9246786151392076e-06, - "loss": 0.6406, - "step": 10810 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.922857304737378e-06, - "loss": 0.7518, - "step": 10811 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.9210361565312675e-06, - "loss": 0.5423, - "step": 10812 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.9192151705933286e-06, - "loss": 0.6111, - "step": 10813 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.917394346996001e-06, - "loss": 0.5507, - "step": 10814 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.915573685811722e-06, - "loss": 0.6491, - "step": 10815 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.913753187112916e-06, - "loss": 0.5697, - "step": 10816 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.911932850972007e-06, - "loss": 0.6167, - "step": 10817 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.910112677461415e-06, - "loss": 0.6189, - "step": 10818 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.908292666653547e-06, - "loss": 0.743, - "step": 10819 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.906472818620803e-06, - "loss": 0.5723, - "step": 10820 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.904653133435586e-06, - "loss": 0.6666, - "step": 10821 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.902833611170282e-06, - "loss": 0.6591, - "step": 10822 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.901014251897279e-06, - "loss": 0.6004, - "step": 10823 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.899195055688948e-06, - "loss": 0.5625, - "step": 10824 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8973760226176625e-06, - "loss": 0.5353, - "step": 10825 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.895557152755787e-06, - "loss": 0.5909, - "step": 10826 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8937384461756805e-06, - "loss": 0.5979, - "step": 10827 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.891919902949692e-06, - "loss": 0.658, - "step": 10828 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8901015231501665e-06, - "loss": 0.5939, - "step": 10829 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.888283306849446e-06, - "loss": 0.6354, - "step": 10830 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8864652541198575e-06, - "loss": 0.5655, - "step": 10831 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.884647365033729e-06, - "loss": 0.5817, - "step": 10832 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.882829639663377e-06, - "loss": 0.7678, - "step": 10833 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8810120780811145e-06, - "loss": 0.519, - "step": 10834 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8791946803592484e-06, - "loss": 0.5806, - "step": 10835 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.877377446570077e-06, - "loss": 0.59, - "step": 10836 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.875560376785892e-06, - "loss": 0.6209, - "step": 10837 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8737434710789875e-06, - "loss": 0.6558, - "step": 10838 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.871926729521629e-06, - "loss": 0.6406, - "step": 10839 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.870110152186099e-06, - "loss": 0.7359, - "step": 10840 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8682937391446635e-06, - "loss": 0.6816, - "step": 10841 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.866477490469577e-06, - "loss": 0.5409, - "step": 10842 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.864661406233101e-06, - "loss": 0.7578, - "step": 10843 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.862845486507478e-06, - "loss": 0.7489, - "step": 10844 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.861029731364951e-06, - "loss": 0.5921, - "step": 10845 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8592141408777495e-06, - "loss": 0.617, - "step": 10846 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8573987151181e-06, - "loss": 0.6588, - "step": 10847 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.855583454158229e-06, - "loss": 0.6197, - "step": 10848 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.853768358070349e-06, - "loss": 0.6042, - "step": 10849 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.851953426926663e-06, - "loss": 0.5816, - "step": 10850 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.850138660799378e-06, - "loss": 0.6133, - "step": 10851 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.848324059760687e-06, - "loss": 0.6393, - "step": 10852 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.846509623882779e-06, - "loss": 0.6325, - "step": 10853 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8446953532378304e-06, - "loss": 0.4948, - "step": 10854 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.842881247898018e-06, - "loss": 0.5499, - "step": 10855 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.841067307935513e-06, - "loss": 0.6854, - "step": 10856 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.8392535334224755e-06, - "loss": 0.5911, - "step": 10857 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.837439924431057e-06, - "loss": 0.7511, - "step": 10858 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.835626481033413e-06, - "loss": 0.7155, - "step": 10859 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.833813203301684e-06, - "loss": 0.6173, - "step": 10860 - }, - { - "epoch": 1.96, - "grad_norm": 0.0, - "learning_rate": 5.832000091308e-06, - "loss": 0.6002, - "step": 10861 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.8301871451244875e-06, - "loss": 0.7168, - "step": 10862 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.8283743648232785e-06, - "loss": 0.6622, - "step": 10863 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.826561750476483e-06, - "loss": 0.6262, - "step": 10864 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.8247493021562105e-06, - "loss": 0.5412, - "step": 10865 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.822937019934564e-06, - "loss": 0.6032, - "step": 10866 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.821124903883638e-06, - "loss": 0.6631, - "step": 10867 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.819312954075522e-06, - "loss": 0.5571, - "step": 10868 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.817501170582299e-06, - "loss": 0.6168, - "step": 10869 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.815689553476045e-06, - "loss": 0.7678, - "step": 10870 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.813878102828827e-06, - "loss": 0.6499, - "step": 10871 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.812066818712712e-06, - "loss": 0.6597, - "step": 10872 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.8102557011997475e-06, - "loss": 0.5328, - "step": 10873 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.808444750361992e-06, - "loss": 0.6385, - "step": 10874 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.806633966271485e-06, - "loss": 0.5247, - "step": 10875 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.804823349000268e-06, - "loss": 0.6632, - "step": 10876 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.8030128986203595e-06, - "loss": 0.6775, - "step": 10877 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.801202615203785e-06, - "loss": 0.579, - "step": 10878 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.799392498822566e-06, - "loss": 0.4951, - "step": 10879 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.79758254954871e-06, - "loss": 0.5927, - "step": 10880 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.795772767454219e-06, - "loss": 0.5736, - "step": 10881 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.79396315261109e-06, - "loss": 0.587, - "step": 10882 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.792153705091312e-06, - "loss": 0.5846, - "step": 10883 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.790344424966869e-06, - "loss": 0.6593, - "step": 10884 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.788535312309735e-06, - "loss": 0.627, - "step": 10885 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.786726367191881e-06, - "loss": 0.5943, - "step": 10886 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7849175896852705e-06, - "loss": 0.5658, - "step": 10887 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.783108979861859e-06, - "loss": 0.5748, - "step": 10888 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7813005377935925e-06, - "loss": 0.6426, - "step": 10889 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.779492263552421e-06, - "loss": 0.5247, - "step": 10890 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.777684157210282e-06, - "loss": 0.6685, - "step": 10891 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.775876218839096e-06, - "loss": 0.5783, - "step": 10892 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.77406844851079e-06, - "loss": 0.6143, - "step": 10893 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7722608462972775e-06, - "loss": 0.5949, - "step": 10894 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.770453412270474e-06, - "loss": 0.6033, - "step": 10895 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.768646146502279e-06, - "loss": 0.6211, - "step": 10896 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.76683904906459e-06, - "loss": 0.6152, - "step": 10897 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.765032120029299e-06, - "loss": 0.5462, - "step": 10898 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.763225359468278e-06, - "loss": 0.7084, - "step": 10899 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.761418767453413e-06, - "loss": 0.4999, - "step": 10900 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.759612344056571e-06, - "loss": 0.553, - "step": 10901 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.757806089349616e-06, - "loss": 0.6892, - "step": 10902 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7560000034043995e-06, - "loss": 0.465, - "step": 10903 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.754194086292775e-06, - "loss": 0.6469, - "step": 10904 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.752388338086581e-06, - "loss": 0.7563, - "step": 10905 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.750582758857656e-06, - "loss": 0.5851, - "step": 10906 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.74877734867783e-06, - "loss": 0.687, - "step": 10907 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7469721076189235e-06, - "loss": 0.6035, - "step": 10908 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.745167035752752e-06, - "loss": 0.651, - "step": 10909 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.743362133151125e-06, - "loss": 0.571, - "step": 10910 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.74155739988584e-06, - "loss": 0.5259, - "step": 10911 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.7397528360287e-06, - "loss": 0.5878, - "step": 10912 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.737948441651495e-06, - "loss": 0.5548, - "step": 10913 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.736144216826e-06, - "loss": 0.638, - "step": 10914 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.73434016162399e-06, - "loss": 0.671, - "step": 10915 - }, - { - "epoch": 1.97, - "grad_norm": 0.0, - "learning_rate": 5.732536276117233e-06, - "loss": 0.5662, - "step": 10916 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.730732560377498e-06, - "loss": 0.6645, - "step": 10917 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.728929014476534e-06, - "loss": 0.5113, - "step": 10918 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.727125638486092e-06, - "loss": 0.5883, - "step": 10919 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.725322432477911e-06, - "loss": 0.5639, - "step": 10920 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.723519396523726e-06, - "loss": 0.6291, - "step": 10921 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.721716530695267e-06, - "loss": 0.6661, - "step": 10922 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.719913835064252e-06, - "loss": 0.5781, - "step": 10923 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.7181113097023985e-06, - "loss": 0.6349, - "step": 10924 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.716308954681411e-06, - "loss": 0.709, - "step": 10925 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.714506770072993e-06, - "loss": 0.6484, - "step": 10926 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.712704755948833e-06, - "loss": 0.5502, - "step": 10927 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.710902912380625e-06, - "loss": 0.6286, - "step": 10928 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.7091012394400515e-06, - "loss": 0.6459, - "step": 10929 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.707299737198777e-06, - "loss": 0.5537, - "step": 10930 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.705498405728474e-06, - "loss": 0.6354, - "step": 10931 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.7036972451007946e-06, - "loss": 0.5727, - "step": 10932 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.701896255387405e-06, - "loss": 0.6495, - "step": 10933 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.700095436659945e-06, - "loss": 0.6654, - "step": 10934 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.698294788990056e-06, - "loss": 0.6874, - "step": 10935 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.696494312449371e-06, - "loss": 0.5487, - "step": 10936 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.694694007109507e-06, - "loss": 0.6195, - "step": 10937 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.692893873042096e-06, - "loss": 0.6378, - "step": 10938 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.691093910318746e-06, - "loss": 0.67, - "step": 10939 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.689294119011059e-06, - "loss": 0.6301, - "step": 10940 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.687494499190638e-06, - "loss": 0.5659, - "step": 10941 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.685695050929074e-06, - "loss": 0.5894, - "step": 10942 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.6838957742979515e-06, - "loss": 0.5799, - "step": 10943 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.682096669368848e-06, - "loss": 0.6512, - "step": 10944 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.680297736213338e-06, - "loss": 0.6779, - "step": 10945 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.678498974902983e-06, - "loss": 0.5706, - "step": 10946 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.6767003855093415e-06, - "loss": 0.601, - "step": 10947 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.674901968103965e-06, - "loss": 0.6094, - "step": 10948 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.673103722758394e-06, - "loss": 0.5348, - "step": 10949 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.671305649544173e-06, - "loss": 0.6808, - "step": 10950 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.669507748532833e-06, - "loss": 0.5812, - "step": 10951 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.667710019795889e-06, - "loss": 0.5995, - "step": 10952 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.6659124634048625e-06, - "loss": 0.6902, - "step": 10953 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.664115079431258e-06, - "loss": 0.566, - "step": 10954 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.662317867946589e-06, - "loss": 0.5856, - "step": 10955 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.660520829022344e-06, - "loss": 0.5836, - "step": 10956 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.658723962730016e-06, - "loss": 0.6429, - "step": 10957 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.656927269141086e-06, - "loss": 0.5683, - "step": 10958 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.655130748327028e-06, - "loss": 0.5682, - "step": 10959 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.653334400359311e-06, - "loss": 0.6269, - "step": 10960 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.651538225309401e-06, - "loss": 0.6789, - "step": 10961 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.649742223248747e-06, - "loss": 0.6749, - "step": 10962 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.6479463942488025e-06, - "loss": 0.6001, - "step": 10963 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.646150738381003e-06, - "loss": 0.6312, - "step": 10964 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.644355255716783e-06, - "loss": 0.6869, - "step": 10965 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.642559946327581e-06, - "loss": 0.4921, - "step": 10966 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.640764810284804e-06, - "loss": 0.5096, - "step": 10967 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.638969847659872e-06, - "loss": 0.5931, - "step": 10968 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.637175058524189e-06, - "loss": 0.5782, - "step": 10969 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.635380442949152e-06, - "loss": 0.6314, - "step": 10970 - }, - { - "epoch": 1.98, - "grad_norm": 0.0, - "learning_rate": 5.633586001006164e-06, - "loss": 0.5515, - "step": 10971 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.631791732766603e-06, - "loss": 0.6551, - "step": 10972 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.62999763830185e-06, - "loss": 0.6691, - "step": 10973 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.628203717683282e-06, - "loss": 0.594, - "step": 10974 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.626409970982252e-06, - "loss": 0.6149, - "step": 10975 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.624616398270129e-06, - "loss": 0.5748, - "step": 10976 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.62282299961826e-06, - "loss": 0.7909, - "step": 10977 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.621029775097992e-06, - "loss": 0.6204, - "step": 10978 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.6192367247806604e-06, - "loss": 0.5962, - "step": 10979 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.617443848737596e-06, - "loss": 0.6006, - "step": 10980 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.615651147040121e-06, - "loss": 0.6748, - "step": 10981 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.61385861975956e-06, - "loss": 0.5493, - "step": 10982 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.612066266967215e-06, - "loss": 0.6102, - "step": 10983 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.61027408873439e-06, - "loss": 0.7001, - "step": 10984 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.608482085132382e-06, - "loss": 0.7008, - "step": 10985 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.606690256232474e-06, - "loss": 0.5323, - "step": 10986 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.6048986021059606e-06, - "loss": 0.5748, - "step": 10987 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.603107122824109e-06, - "loss": 0.5682, - "step": 10988 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.60131581845819e-06, - "loss": 0.5723, - "step": 10989 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.599524689079463e-06, - "loss": 0.4762, - "step": 10990 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5977337347591765e-06, - "loss": 0.5685, - "step": 10991 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.595942955568587e-06, - "loss": 0.5573, - "step": 10992 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.594152351578932e-06, - "loss": 0.6258, - "step": 10993 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.592361922861443e-06, - "loss": 0.7004, - "step": 10994 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.590571669487348e-06, - "loss": 0.5261, - "step": 10995 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.588781591527866e-06, - "loss": 0.617, - "step": 10996 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.58699168905421e-06, - "loss": 0.5278, - "step": 10997 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.585201962137583e-06, - "loss": 0.5382, - "step": 10998 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.583412410849185e-06, - "loss": 0.6115, - "step": 10999 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.581623035260208e-06, - "loss": 0.609, - "step": 11000 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5798338354418365e-06, - "loss": 0.6833, - "step": 11001 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.578044811465246e-06, - "loss": 0.521, - "step": 11002 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5762559634016045e-06, - "loss": 0.678, - "step": 11003 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5744672913220874e-06, - "loss": 0.6032, - "step": 11004 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.572678795297838e-06, - "loss": 0.6849, - "step": 11005 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.57089047540001e-06, - "loss": 0.5718, - "step": 11006 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.569102331699748e-06, - "loss": 0.6707, - "step": 11007 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.567314364268182e-06, - "loss": 0.663, - "step": 11008 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.565526573176447e-06, - "loss": 0.6105, - "step": 11009 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5637389584956615e-06, - "loss": 0.6414, - "step": 11010 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.56195152029694e-06, - "loss": 0.6625, - "step": 11011 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.560164258651395e-06, - "loss": 0.5905, - "step": 11012 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.558377173630113e-06, - "loss": 0.5494, - "step": 11013 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5565902653042e-06, - "loss": 0.6418, - "step": 11014 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.554803533744737e-06, - "loss": 0.6153, - "step": 11015 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.553016979022806e-06, - "loss": 0.5678, - "step": 11016 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.551230601209478e-06, - "loss": 0.5252, - "step": 11017 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5494444003758165e-06, - "loss": 0.6188, - "step": 11018 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.547658376592882e-06, - "loss": 0.5783, - "step": 11019 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.545872529931724e-06, - "loss": 0.6683, - "step": 11020 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.5440868604633865e-06, - "loss": 0.6604, - "step": 11021 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.542301368258908e-06, - "loss": 0.6224, - "step": 11022 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.540516053389318e-06, - "loss": 0.6238, - "step": 11023 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.538730915925634e-06, - "loss": 0.6269, - "step": 11024 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.53694595593888e-06, - "loss": 0.5986, - "step": 11025 - }, - { - "epoch": 1.99, - "grad_norm": 0.0, - "learning_rate": 5.535161173500063e-06, - "loss": 0.5564, - "step": 11026 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.533376568680187e-06, - "loss": 0.6516, - "step": 11027 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.531592141550238e-06, - "loss": 0.6376, - "step": 11028 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.529807892181205e-06, - "loss": 0.6309, - "step": 11029 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.528023820644076e-06, - "loss": 0.6413, - "step": 11030 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.52623992700982e-06, - "loss": 0.6534, - "step": 11031 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.524456211349405e-06, - "loss": 0.6715, - "step": 11032 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.522672673733786e-06, - "loss": 0.6886, - "step": 11033 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.520889314233922e-06, - "loss": 0.6891, - "step": 11034 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.519106132920751e-06, - "loss": 0.5553, - "step": 11035 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.517323129865215e-06, - "loss": 0.6015, - "step": 11036 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.515540305138245e-06, - "loss": 0.6919, - "step": 11037 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.513757658810763e-06, - "loss": 0.5544, - "step": 11038 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.511975190953688e-06, - "loss": 0.6894, - "step": 11039 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.510192901637927e-06, - "loss": 0.6257, - "step": 11040 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.508410790934381e-06, - "loss": 0.6054, - "step": 11041 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.506628858913956e-06, - "loss": 0.6384, - "step": 11042 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.504847105647527e-06, - "loss": 0.5862, - "step": 11043 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.503065531205982e-06, - "loss": 0.6829, - "step": 11044 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.501284135660194e-06, - "loss": 0.5113, - "step": 11045 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.499502919081026e-06, - "loss": 0.7084, - "step": 11046 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.497721881539345e-06, - "loss": 0.5115, - "step": 11047 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.495941023106001e-06, - "loss": 0.5993, - "step": 11048 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.494160343851838e-06, - "loss": 0.5948, - "step": 11049 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4923798438477e-06, - "loss": 0.635, - "step": 11050 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.490599523164406e-06, - "loss": 0.639, - "step": 11051 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.488819381872792e-06, - "loss": 0.628, - "step": 11052 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.487039420043672e-06, - "loss": 0.5898, - "step": 11053 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.485259637747853e-06, - "loss": 0.5364, - "step": 11054 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.483480035056141e-06, - "loss": 0.5016, - "step": 11055 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.48170061203933e-06, - "loss": 0.402, - "step": 11056 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.479921368768208e-06, - "loss": 0.4157, - "step": 11057 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4781423053135595e-06, - "loss": 0.4308, - "step": 11058 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4763634217461555e-06, - "loss": 0.5088, - "step": 11059 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4745847181367635e-06, - "loss": 0.4611, - "step": 11060 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.472806194556144e-06, - "loss": 0.459, - "step": 11061 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4710278510750456e-06, - "loss": 0.4617, - "step": 11062 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.469249687764222e-06, - "loss": 0.5119, - "step": 11063 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.467471704694408e-06, - "loss": 0.5048, - "step": 11064 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.465693901936337e-06, - "loss": 0.5331, - "step": 11065 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.463916279560728e-06, - "loss": 0.4485, - "step": 11066 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.462138837638295e-06, - "loss": 0.5079, - "step": 11067 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.460361576239757e-06, - "loss": 0.4998, - "step": 11068 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.458584495435813e-06, - "loss": 0.5072, - "step": 11069 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.456807595297156e-06, - "loss": 0.5211, - "step": 11070 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.455030875894477e-06, - "loss": 0.473, - "step": 11071 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.453254337298454e-06, - "loss": 0.4557, - "step": 11072 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4514779795797645e-06, - "loss": 0.4109, - "step": 11073 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.449701802809073e-06, - "loss": 0.4301, - "step": 11074 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4479258070570384e-06, - "loss": 0.4507, - "step": 11075 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.446149992394314e-06, - "loss": 0.4272, - "step": 11076 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.444374358891542e-06, - "loss": 0.409, - "step": 11077 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.442598906619361e-06, - "loss": 0.4984, - "step": 11078 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4408236356484045e-06, - "loss": 0.4099, - "step": 11079 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.4390485460492995e-06, - "loss": 0.4605, - "step": 11080 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.437273637892652e-06, - "loss": 0.4593, - "step": 11081 - }, - { - "epoch": 2.0, - "grad_norm": 0.0, - "learning_rate": 5.435498911249075e-06, - "loss": 0.4596, - "step": 11082 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.433724366189168e-06, - "loss": 0.478, - "step": 11083 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.43195000278353e-06, - "loss": 0.4486, - "step": 11084 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.430175821102749e-06, - "loss": 0.4992, - "step": 11085 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.428401821217401e-06, - "loss": 0.4073, - "step": 11086 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.42662800319806e-06, - "loss": 0.4698, - "step": 11087 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.424854367115293e-06, - "loss": 0.4316, - "step": 11088 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.423080913039656e-06, - "loss": 0.4048, - "step": 11089 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.421307641041701e-06, - "loss": 0.4454, - "step": 11090 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.4195345511919716e-06, - "loss": 0.475, - "step": 11091 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.417761643561005e-06, - "loss": 0.4242, - "step": 11092 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.415988918219329e-06, - "loss": 0.426, - "step": 11093 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.414216375237468e-06, - "loss": 0.3785, - "step": 11094 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.412444014685935e-06, - "loss": 0.4636, - "step": 11095 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.410671836635237e-06, - "loss": 0.4236, - "step": 11096 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.408899841155877e-06, - "loss": 0.3425, - "step": 11097 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.4071280283183445e-06, - "loss": 0.4596, - "step": 11098 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.405356398193129e-06, - "loss": 0.4785, - "step": 11099 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.403584950850702e-06, - "loss": 0.5688, - "step": 11100 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.401813686361544e-06, - "loss": 0.4554, - "step": 11101 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.400042604796114e-06, - "loss": 0.4861, - "step": 11102 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.398271706224875e-06, - "loss": 0.3952, - "step": 11103 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.396500990718266e-06, - "loss": 0.466, - "step": 11104 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.394730458346728e-06, - "loss": 0.4546, - "step": 11105 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.392960109180707e-06, - "loss": 0.4004, - "step": 11106 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.391189943290625e-06, - "loss": 0.4574, - "step": 11107 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3894199607469035e-06, - "loss": 0.4931, - "step": 11108 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.387650161619953e-06, - "loss": 0.406, - "step": 11109 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.385880545980181e-06, - "loss": 0.4499, - "step": 11110 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.384111113897985e-06, - "loss": 0.5039, - "step": 11111 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.382341865443756e-06, - "loss": 0.475, - "step": 11112 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.38057280068788e-06, - "loss": 0.4415, - "step": 11113 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3788039197007304e-06, - "loss": 0.4106, - "step": 11114 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.377035222552677e-06, - "loss": 0.4589, - "step": 11115 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3752667093140776e-06, - "loss": 0.4187, - "step": 11116 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.373498380055295e-06, - "loss": 0.4322, - "step": 11117 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.371730234846677e-06, - "loss": 0.3912, - "step": 11118 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.369962273758555e-06, - "loss": 0.4663, - "step": 11119 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3681944968612665e-06, - "loss": 0.4751, - "step": 11120 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3664269042251305e-06, - "loss": 0.4515, - "step": 11121 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.364659495920475e-06, - "loss": 0.4532, - "step": 11122 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.362892272017604e-06, - "loss": 0.5543, - "step": 11123 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.361125232586824e-06, - "loss": 0.4414, - "step": 11124 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.359358377698431e-06, - "loss": 0.4041, - "step": 11125 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.357591707422705e-06, - "loss": 0.4366, - "step": 11126 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.355825221829938e-06, - "loss": 0.4563, - "step": 11127 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3540589209903985e-06, - "loss": 0.4356, - "step": 11128 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.352292804974354e-06, - "loss": 0.4511, - "step": 11129 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3505268738520646e-06, - "loss": 0.3919, - "step": 11130 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.34876112769378e-06, - "loss": 0.4759, - "step": 11131 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.346995566569747e-06, - "loss": 0.5211, - "step": 11132 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.345230190550201e-06, - "loss": 0.4092, - "step": 11133 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.343464999705372e-06, - "loss": 0.4246, - "step": 11134 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.3416999941054825e-06, - "loss": 0.4103, - "step": 11135 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.339935173820747e-06, - "loss": 0.4271, - "step": 11136 - }, - { - "epoch": 2.01, - "grad_norm": 0.0, - "learning_rate": 5.338170538921373e-06, - "loss": 0.5028, - "step": 11137 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.336406089477558e-06, - "loss": 0.4127, - "step": 11138 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.334641825559503e-06, - "loss": 0.4803, - "step": 11139 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.332877747237386e-06, - "loss": 0.3397, - "step": 11140 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.331113854581393e-06, - "loss": 0.5295, - "step": 11141 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.329350147661685e-06, - "loss": 0.462, - "step": 11142 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.327586626548424e-06, - "loss": 0.5106, - "step": 11143 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.3258232913117765e-06, - "loss": 0.4781, - "step": 11144 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.324060142021885e-06, - "loss": 0.3897, - "step": 11145 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.322297178748892e-06, - "loss": 0.45, - "step": 11146 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.320534401562931e-06, - "loss": 0.4421, - "step": 11147 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.318771810534128e-06, - "loss": 0.522, - "step": 11148 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.317009405732601e-06, - "loss": 0.4682, - "step": 11149 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.315247187228463e-06, - "loss": 0.4838, - "step": 11150 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.3134851550918155e-06, - "loss": 0.4473, - "step": 11151 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.311723309392759e-06, - "loss": 0.5054, - "step": 11152 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.30996165020138e-06, - "loss": 0.4853, - "step": 11153 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.3082001775877565e-06, - "loss": 0.5453, - "step": 11154 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.306438891621972e-06, - "loss": 0.395, - "step": 11155 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.304677792374093e-06, - "loss": 0.4549, - "step": 11156 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.302916879914171e-06, - "loss": 0.4709, - "step": 11157 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.301156154312261e-06, - "loss": 0.418, - "step": 11158 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.2993956156384065e-06, - "loss": 0.4593, - "step": 11159 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.297635263962651e-06, - "loss": 0.4372, - "step": 11160 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.295875099355019e-06, - "loss": 0.4332, - "step": 11161 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.294115121885535e-06, - "loss": 0.4915, - "step": 11162 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.292355331624215e-06, - "loss": 0.4806, - "step": 11163 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.2905957286410595e-06, - "loss": 0.4156, - "step": 11164 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.288836313006076e-06, - "loss": 0.4542, - "step": 11165 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.2870770847892535e-06, - "loss": 0.4314, - "step": 11166 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.285318044060581e-06, - "loss": 0.4804, - "step": 11167 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.283559190890031e-06, - "loss": 0.4077, - "step": 11168 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.2818005253475775e-06, - "loss": 0.3637, - "step": 11169 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.280042047503177e-06, - "loss": 0.4345, - "step": 11170 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.278283757426799e-06, - "loss": 0.4103, - "step": 11171 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.276525655188376e-06, - "loss": 0.4427, - "step": 11172 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.274767740857856e-06, - "loss": 0.4041, - "step": 11173 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.273010014505169e-06, - "loss": 0.4619, - "step": 11174 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.271252476200237e-06, - "loss": 0.4665, - "step": 11175 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.269495126012987e-06, - "loss": 0.4322, - "step": 11176 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.267737964013324e-06, - "loss": 0.451, - "step": 11177 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.265980990271156e-06, - "loss": 0.4471, - "step": 11178 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.264224204856372e-06, - "loss": 0.4198, - "step": 11179 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.262467607838859e-06, - "loss": 0.4517, - "step": 11180 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.260711199288498e-06, - "loss": 0.3737, - "step": 11181 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.258954979275168e-06, - "loss": 0.4784, - "step": 11182 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.257198947868731e-06, - "loss": 0.4204, - "step": 11183 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.255443105139044e-06, - "loss": 0.4751, - "step": 11184 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.253687451155959e-06, - "loss": 0.4685, - "step": 11185 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.251931985989317e-06, - "loss": 0.4494, - "step": 11186 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.250176709708955e-06, - "loss": 0.4515, - "step": 11187 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.248421622384701e-06, - "loss": 0.4466, - "step": 11188 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.246666724086374e-06, - "loss": 0.4979, - "step": 11189 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.2449120148837875e-06, - "loss": 0.4276, - "step": 11190 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.243157494846748e-06, - "loss": 0.4821, - "step": 11191 - }, - { - "epoch": 2.02, - "grad_norm": 0.0, - "learning_rate": 5.241403164045047e-06, - "loss": 0.4743, - "step": 11192 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.239649022548485e-06, - "loss": 0.4297, - "step": 11193 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.237895070426844e-06, - "loss": 0.4535, - "step": 11194 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.23614130774989e-06, - "loss": 0.4369, - "step": 11195 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.234387734587396e-06, - "loss": 0.4551, - "step": 11196 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.232634351009119e-06, - "loss": 0.4101, - "step": 11197 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.230881157084817e-06, - "loss": 0.4346, - "step": 11198 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.229128152884233e-06, - "loss": 0.4783, - "step": 11199 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.227375338477105e-06, - "loss": 0.5262, - "step": 11200 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.225622713933165e-06, - "loss": 0.4164, - "step": 11201 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.223870279322125e-06, - "loss": 0.4414, - "step": 11202 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.22211803471371e-06, - "loss": 0.4786, - "step": 11203 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.220365980177626e-06, - "loss": 0.3492, - "step": 11204 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.2186141157835715e-06, - "loss": 0.4645, - "step": 11205 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.216862441601236e-06, - "loss": 0.3781, - "step": 11206 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.215110957700309e-06, - "loss": 0.4725, - "step": 11207 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.2133596641504595e-06, - "loss": 0.3815, - "step": 11208 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.211608561021372e-06, - "loss": 0.4282, - "step": 11209 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.209857648382693e-06, - "loss": 0.4333, - "step": 11210 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.208106926304084e-06, - "loss": 0.4018, - "step": 11211 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.2063563948551905e-06, - "loss": 0.4867, - "step": 11212 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.204606054105647e-06, - "loss": 0.4988, - "step": 11213 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.202855904125095e-06, - "loss": 0.4821, - "step": 11214 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.201105944983152e-06, - "loss": 0.4543, - "step": 11215 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1993561767494395e-06, - "loss": 0.4433, - "step": 11216 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.197606599493558e-06, - "loss": 0.4436, - "step": 11217 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.195857213285109e-06, - "loss": 0.4656, - "step": 11218 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.194108018193695e-06, - "loss": 0.4144, - "step": 11219 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.192359014288897e-06, - "loss": 0.4768, - "step": 11220 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.190610201640292e-06, - "loss": 0.4127, - "step": 11221 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.188861580317452e-06, - "loss": 0.4784, - "step": 11222 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.187113150389942e-06, - "loss": 0.4362, - "step": 11223 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1853649119273145e-06, - "loss": 0.4864, - "step": 11224 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.183616864999119e-06, - "loss": 0.4282, - "step": 11225 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1818690096748955e-06, - "loss": 0.4296, - "step": 11226 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.180121346024176e-06, - "loss": 0.5752, - "step": 11227 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.178373874116488e-06, - "loss": 0.5217, - "step": 11228 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1766265940213455e-06, - "loss": 0.4457, - "step": 11229 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1748795058082565e-06, - "loss": 0.4434, - "step": 11230 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.173132609546736e-06, - "loss": 0.4853, - "step": 11231 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1713859053062636e-06, - "loss": 0.4749, - "step": 11232 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1696393931563315e-06, - "loss": 0.4981, - "step": 11233 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1678930731664215e-06, - "loss": 0.4631, - "step": 11234 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.166146945405996e-06, - "loss": 0.3964, - "step": 11235 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.164401009944532e-06, - "loss": 0.4214, - "step": 11236 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.16265526685148e-06, - "loss": 0.4464, - "step": 11237 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.160909716196285e-06, - "loss": 0.4785, - "step": 11238 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.159164358048399e-06, - "loss": 0.4833, - "step": 11239 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.157419192477238e-06, - "loss": 0.4699, - "step": 11240 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.15567421955224e-06, - "loss": 0.3999, - "step": 11241 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.153929439342823e-06, - "loss": 0.4362, - "step": 11242 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.1521848519183936e-06, - "loss": 0.4235, - "step": 11243 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.150440457348355e-06, - "loss": 0.4009, - "step": 11244 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.148696255702104e-06, - "loss": 0.4205, - "step": 11245 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.146952247049022e-06, - "loss": 0.4207, - "step": 11246 - }, - { - "epoch": 2.03, - "grad_norm": 0.0, - "learning_rate": 5.145208431458501e-06, - "loss": 0.4576, - "step": 11247 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.143464808999901e-06, - "loss": 0.4356, - "step": 11248 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.141721379742591e-06, - "loss": 0.4419, - "step": 11249 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.1399781437559286e-06, - "loss": 0.4359, - "step": 11250 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.138235101109255e-06, - "loss": 0.454, - "step": 11251 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.136492251871923e-06, - "loss": 0.4553, - "step": 11252 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.13474959611326e-06, - "loss": 0.5018, - "step": 11253 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.133007133902598e-06, - "loss": 0.4968, - "step": 11254 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.131264865309244e-06, - "loss": 0.4533, - "step": 11255 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.129522790402512e-06, - "loss": 0.4869, - "step": 11256 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.1277809092517115e-06, - "loss": 0.4338, - "step": 11257 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.126039221926131e-06, - "loss": 0.4922, - "step": 11258 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.1242977284950616e-06, - "loss": 0.5516, - "step": 11259 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.122556429027781e-06, - "loss": 0.4199, - "step": 11260 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.12081532359356e-06, - "loss": 0.4227, - "step": 11261 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.119074412261664e-06, - "loss": 0.5177, - "step": 11262 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.11733369510135e-06, - "loss": 0.4992, - "step": 11263 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.115593172181866e-06, - "loss": 0.4059, - "step": 11264 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.113852843572453e-06, - "loss": 0.4599, - "step": 11265 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.112112709342344e-06, - "loss": 0.4124, - "step": 11266 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.1103727695607655e-06, - "loss": 0.4572, - "step": 11267 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.108633024296929e-06, - "loss": 0.4339, - "step": 11268 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.10689347362006e-06, - "loss": 0.4396, - "step": 11269 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.105154117599347e-06, - "loss": 0.4077, - "step": 11270 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.103414956303987e-06, - "loss": 0.4698, - "step": 11271 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.10167598980317e-06, - "loss": 0.4907, - "step": 11272 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.099937218166069e-06, - "loss": 0.4773, - "step": 11273 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.098198641461862e-06, - "loss": 0.4714, - "step": 11274 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0964602597597125e-06, - "loss": 0.4358, - "step": 11275 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.094722073128773e-06, - "loss": 0.4599, - "step": 11276 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.092984081638197e-06, - "loss": 0.4259, - "step": 11277 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.091246285357112e-06, - "loss": 0.4545, - "step": 11278 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.089508684354664e-06, - "loss": 0.4669, - "step": 11279 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.087771278699971e-06, - "loss": 0.4866, - "step": 11280 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0860340684621535e-06, - "loss": 0.4392, - "step": 11281 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0842970537103176e-06, - "loss": 0.4924, - "step": 11282 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.082560234513566e-06, - "loss": 0.4166, - "step": 11283 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.080823610940992e-06, - "loss": 0.4372, - "step": 11284 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.079087183061684e-06, - "loss": 0.5465, - "step": 11285 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.077350950944715e-06, - "loss": 0.4251, - "step": 11286 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.075614914659159e-06, - "loss": 0.4195, - "step": 11287 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0738790742740795e-06, - "loss": 0.4479, - "step": 11288 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.072143429858523e-06, - "loss": 0.54, - "step": 11289 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.070407981481547e-06, - "loss": 0.5543, - "step": 11290 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.068672729212188e-06, - "loss": 0.4326, - "step": 11291 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0669376731194786e-06, - "loss": 0.4575, - "step": 11292 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.065202813272435e-06, - "loss": 0.3865, - "step": 11293 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.063468149740074e-06, - "loss": 0.5055, - "step": 11294 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.06173368259141e-06, - "loss": 0.477, - "step": 11295 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0599994118954395e-06, - "loss": 0.4652, - "step": 11296 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0582653377211565e-06, - "loss": 0.4484, - "step": 11297 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.056531460137544e-06, - "loss": 0.4921, - "step": 11298 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0547977792135785e-06, - "loss": 0.3681, - "step": 11299 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.053064295018227e-06, - "loss": 0.373, - "step": 11300 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.051331007620455e-06, - "loss": 0.4501, - "step": 11301 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.049597917089211e-06, - "loss": 0.3947, - "step": 11302 - }, - { - "epoch": 2.04, - "grad_norm": 0.0, - "learning_rate": 5.0478650234934435e-06, - "loss": 0.5102, - "step": 11303 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.046132326902089e-06, - "loss": 0.4653, - "step": 11304 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.0443998273840725e-06, - "loss": 0.4453, - "step": 11305 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.0426675250083245e-06, - "loss": 0.4389, - "step": 11306 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.040935419843758e-06, - "loss": 0.42, - "step": 11307 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.039203511959272e-06, - "loss": 0.4344, - "step": 11308 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.03747180142377e-06, - "loss": 0.4237, - "step": 11309 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.035740288306136e-06, - "loss": 0.3741, - "step": 11310 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.034008972675262e-06, - "loss": 0.5025, - "step": 11311 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.032277854600017e-06, - "loss": 0.4595, - "step": 11312 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.030546934149271e-06, - "loss": 0.5298, - "step": 11313 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.02881621139188e-06, - "loss": 0.4499, - "step": 11314 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.027085686396696e-06, - "loss": 0.4332, - "step": 11315 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.025355359232564e-06, - "loss": 0.4901, - "step": 11316 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.023625229968317e-06, - "loss": 0.4242, - "step": 11317 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.021895298672781e-06, - "loss": 0.4709, - "step": 11318 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.02016556541478e-06, - "loss": 0.4292, - "step": 11319 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.018436030263125e-06, - "loss": 0.536, - "step": 11320 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.016706693286617e-06, - "loss": 0.4725, - "step": 11321 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.014977554554054e-06, - "loss": 0.4368, - "step": 11322 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.013248614134223e-06, - "loss": 0.4757, - "step": 11323 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.0115198720959065e-06, - "loss": 0.4354, - "step": 11324 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.009791328507874e-06, - "loss": 0.3741, - "step": 11325 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.0080629834388914e-06, - "loss": 0.4414, - "step": 11326 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.006334836957712e-06, - "loss": 0.4662, - "step": 11327 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.00460688913309e-06, - "loss": 0.4408, - "step": 11328 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.002879140033764e-06, - "loss": 0.411, - "step": 11329 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 5.0011515897284705e-06, - "loss": 0.4829, - "step": 11330 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.999424238285926e-06, - "loss": 0.4826, - "step": 11331 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.997697085774848e-06, - "loss": 0.4304, - "step": 11332 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.995970132263953e-06, - "loss": 0.4785, - "step": 11333 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.994243377821939e-06, - "loss": 0.4864, - "step": 11334 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.992516822517498e-06, - "loss": 0.4382, - "step": 11335 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.9907904664193165e-06, - "loss": 0.4657, - "step": 11336 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.989064309596073e-06, - "loss": 0.4352, - "step": 11337 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.987338352116433e-06, - "loss": 0.408, - "step": 11338 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.985612594049062e-06, - "loss": 0.3697, - "step": 11339 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.983887035462612e-06, - "loss": 0.4035, - "step": 11340 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.982161676425727e-06, - "loss": 0.3927, - "step": 11341 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.980436517007049e-06, - "loss": 0.4215, - "step": 11342 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.978711557275201e-06, - "loss": 0.4207, - "step": 11343 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.9769867972988114e-06, - "loss": 0.4288, - "step": 11344 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.9752622371464975e-06, - "loss": 0.4399, - "step": 11345 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.973537876886856e-06, - "loss": 0.4337, - "step": 11346 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.971813716588487e-06, - "loss": 0.4004, - "step": 11347 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.970089756319979e-06, - "loss": 0.5396, - "step": 11348 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.968365996149922e-06, - "loss": 0.4349, - "step": 11349 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.966642436146884e-06, - "loss": 0.4399, - "step": 11350 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.964919076379433e-06, - "loss": 0.4435, - "step": 11351 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.963195916916127e-06, - "loss": 0.4097, - "step": 11352 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.961472957825515e-06, - "loss": 0.4613, - "step": 11353 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.959750199176141e-06, - "loss": 0.4995, - "step": 11354 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.958027641036539e-06, - "loss": 0.4507, - "step": 11355 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.956305283475233e-06, - "loss": 0.4586, - "step": 11356 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.954583126560746e-06, - "loss": 0.4809, - "step": 11357 - }, - { - "epoch": 2.05, - "grad_norm": 0.0, - "learning_rate": 4.952861170361584e-06, - "loss": 0.4884, - "step": 11358 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.95113941494625e-06, - "loss": 0.4271, - "step": 11359 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.94941786038324e-06, - "loss": 0.4759, - "step": 11360 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.94769650674104e-06, - "loss": 0.4844, - "step": 11361 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.945975354088127e-06, - "loss": 0.4937, - "step": 11362 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.944254402492973e-06, - "loss": 0.4615, - "step": 11363 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.94253365202404e-06, - "loss": 0.4784, - "step": 11364 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.940813102749779e-06, - "loss": 0.4249, - "step": 11365 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.939092754738643e-06, - "loss": 0.4348, - "step": 11366 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.937372608059067e-06, - "loss": 0.4961, - "step": 11367 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.935652662779484e-06, - "loss": 0.4641, - "step": 11368 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.933932918968312e-06, - "loss": 0.4341, - "step": 11369 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.932213376693962e-06, - "loss": 0.4853, - "step": 11370 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.93049403602485e-06, - "loss": 0.4049, - "step": 11371 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.928774897029369e-06, - "loss": 0.4255, - "step": 11372 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.927055959775911e-06, - "loss": 0.4515, - "step": 11373 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.9253372243328564e-06, - "loss": 0.4258, - "step": 11374 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.923618690768581e-06, - "loss": 0.439, - "step": 11375 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.92190035915145e-06, - "loss": 0.4739, - "step": 11376 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.920182229549822e-06, - "loss": 0.4857, - "step": 11377 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.918464302032047e-06, - "loss": 0.4938, - "step": 11378 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.916746576666468e-06, - "loss": 0.4963, - "step": 11379 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.915029053521419e-06, - "loss": 0.4002, - "step": 11380 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.9133117326652205e-06, - "loss": 0.5039, - "step": 11381 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.911594614166199e-06, - "loss": 0.4101, - "step": 11382 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.909877698092663e-06, - "loss": 0.4197, - "step": 11383 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.9081609845129104e-06, - "loss": 0.3872, - "step": 11384 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.906444473495236e-06, - "loss": 0.4227, - "step": 11385 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.9047281651079215e-06, - "loss": 0.4632, - "step": 11386 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.903012059419254e-06, - "loss": 0.3735, - "step": 11387 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.901296156497498e-06, - "loss": 0.3998, - "step": 11388 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.899580456410916e-06, - "loss": 0.3655, - "step": 11389 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.897864959227765e-06, - "loss": 0.4203, - "step": 11390 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.896149665016278e-06, - "loss": 0.4036, - "step": 11391 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.894434573844705e-06, - "loss": 0.4728, - "step": 11392 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.892719685781271e-06, - "loss": 0.3825, - "step": 11393 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.891005000894196e-06, - "loss": 0.4845, - "step": 11394 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.889290519251695e-06, - "loss": 0.42, - "step": 11395 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.887576240921973e-06, - "loss": 0.4218, - "step": 11396 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.885862165973222e-06, - "loss": 0.4441, - "step": 11397 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.884148294473642e-06, - "loss": 0.4985, - "step": 11398 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.882434626491404e-06, - "loss": 0.5103, - "step": 11399 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.880721162094682e-06, - "loss": 0.4531, - "step": 11400 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.879007901351643e-06, - "loss": 0.4823, - "step": 11401 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.877294844330438e-06, - "loss": 0.4613, - "step": 11402 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.875581991099224e-06, - "loss": 0.5061, - "step": 11403 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.873869341726137e-06, - "loss": 0.501, - "step": 11404 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.87215689627931e-06, - "loss": 0.4765, - "step": 11405 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.870444654826869e-06, - "loss": 0.4509, - "step": 11406 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.86873261743692e-06, - "loss": 0.3815, - "step": 11407 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.867020784177581e-06, - "loss": 0.389, - "step": 11408 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.865309155116948e-06, - "loss": 0.4043, - "step": 11409 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.863597730323114e-06, - "loss": 0.4463, - "step": 11410 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.861886509864161e-06, - "loss": 0.4632, - "step": 11411 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.860175493808165e-06, - "loss": 0.441, - "step": 11412 - }, - { - "epoch": 2.06, - "grad_norm": 0.0, - "learning_rate": 4.858464682223193e-06, - "loss": 0.399, - "step": 11413 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.856754075177304e-06, - "loss": 0.395, - "step": 11414 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.855043672738548e-06, - "loss": 0.5132, - "step": 11415 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.853333474974969e-06, - "loss": 0.437, - "step": 11416 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8516234819546e-06, - "loss": 0.4735, - "step": 11417 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.849913693745471e-06, - "loss": 0.4589, - "step": 11418 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.848204110415592e-06, - "loss": 0.4525, - "step": 11419 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.846494732032983e-06, - "loss": 0.4346, - "step": 11420 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.844785558665645e-06, - "loss": 0.4446, - "step": 11421 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.843076590381567e-06, - "loss": 0.4103, - "step": 11422 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8413678272487365e-06, - "loss": 0.4904, - "step": 11423 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.839659269335126e-06, - "loss": 0.4652, - "step": 11424 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8379509167087145e-06, - "loss": 0.4546, - "step": 11425 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.836242769437458e-06, - "loss": 0.4339, - "step": 11426 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.83453482758931e-06, - "loss": 0.4888, - "step": 11427 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.832827091232221e-06, - "loss": 0.4133, - "step": 11428 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.831119560434112e-06, - "loss": 0.4022, - "step": 11429 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.829412235262927e-06, - "loss": 0.4139, - "step": 11430 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.827705115786581e-06, - "loss": 0.4535, - "step": 11431 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8259982020729855e-06, - "loss": 0.4435, - "step": 11432 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.824291494190047e-06, - "loss": 0.4424, - "step": 11433 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.822584992205657e-06, - "loss": 0.4197, - "step": 11434 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.820878696187702e-06, - "loss": 0.4248, - "step": 11435 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8191726062040734e-06, - "loss": 0.4823, - "step": 11436 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.817466722322629e-06, - "loss": 0.3983, - "step": 11437 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.815761044611237e-06, - "loss": 0.4309, - "step": 11438 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.814055573137751e-06, - "loss": 0.465, - "step": 11439 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.812350307970016e-06, - "loss": 0.4404, - "step": 11440 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.810645249175876e-06, - "loss": 0.4201, - "step": 11441 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.808940396823157e-06, - "loss": 0.4492, - "step": 11442 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8072357509796866e-06, - "loss": 0.4617, - "step": 11443 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.805531311713269e-06, - "loss": 0.4012, - "step": 11444 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.803827079091712e-06, - "loss": 0.4843, - "step": 11445 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8021230531828175e-06, - "loss": 0.4222, - "step": 11446 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.8004192340543735e-06, - "loss": 0.4616, - "step": 11447 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.7987156217741594e-06, - "loss": 0.429, - "step": 11448 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.797012216409947e-06, - "loss": 0.3849, - "step": 11449 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.795309018029504e-06, - "loss": 0.429, - "step": 11450 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.793606026700583e-06, - "loss": 0.5093, - "step": 11451 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.791903242490934e-06, - "loss": 0.459, - "step": 11452 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.790200665468295e-06, - "loss": 0.4251, - "step": 11453 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.7884982957004e-06, - "loss": 0.4502, - "step": 11454 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.7867961332549695e-06, - "loss": 0.4883, - "step": 11455 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.785094178199719e-06, - "loss": 0.4174, - "step": 11456 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.783392430602354e-06, - "loss": 0.4725, - "step": 11457 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.781690890530576e-06, - "loss": 0.4137, - "step": 11458 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.779989558052079e-06, - "loss": 0.4469, - "step": 11459 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.778288433234536e-06, - "loss": 0.4655, - "step": 11460 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.776587516145625e-06, - "loss": 0.3522, - "step": 11461 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.774886806853005e-06, - "loss": 0.4696, - "step": 11462 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.773186305424343e-06, - "loss": 0.4175, - "step": 11463 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.771486011927285e-06, - "loss": 0.4845, - "step": 11464 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.76978592642947e-06, - "loss": 0.4321, - "step": 11465 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.7680860489985345e-06, - "loss": 0.4545, - "step": 11466 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.766386379702092e-06, - "loss": 0.4793, - "step": 11467 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.764686918607767e-06, - "loss": 0.4541, - "step": 11468 - }, - { - "epoch": 2.07, - "grad_norm": 0.0, - "learning_rate": 4.762987665783166e-06, - "loss": 0.4471, - "step": 11469 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.761288621295888e-06, - "loss": 0.5871, - "step": 11470 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.759589785213522e-06, - "loss": 0.4615, - "step": 11471 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.757891157603651e-06, - "loss": 0.4853, - "step": 11472 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.756192738533847e-06, - "loss": 0.4396, - "step": 11473 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.754494528071687e-06, - "loss": 0.4614, - "step": 11474 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7527965262847165e-06, - "loss": 0.4705, - "step": 11475 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.751098733240489e-06, - "loss": 0.427, - "step": 11476 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.749401149006546e-06, - "loss": 0.4204, - "step": 11477 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.747703773650416e-06, - "loss": 0.4849, - "step": 11478 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7460066072396314e-06, - "loss": 0.5068, - "step": 11479 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.744309649841705e-06, - "loss": 0.4018, - "step": 11480 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.742612901524148e-06, - "loss": 0.4641, - "step": 11481 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.740916362354452e-06, - "loss": 0.4245, - "step": 11482 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.739220032400108e-06, - "loss": 0.4306, - "step": 11483 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7375239117286076e-06, - "loss": 0.4728, - "step": 11484 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7358280004074215e-06, - "loss": 0.4637, - "step": 11485 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.734132298504016e-06, - "loss": 0.4172, - "step": 11486 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.732436806085848e-06, - "loss": 0.5051, - "step": 11487 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.730741523220368e-06, - "loss": 0.4079, - "step": 11488 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7290464499750164e-06, - "loss": 0.4794, - "step": 11489 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7273515864172274e-06, - "loss": 0.4737, - "step": 11490 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.725656932614423e-06, - "loss": 0.414, - "step": 11491 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.723962488634023e-06, - "loss": 0.4819, - "step": 11492 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7222682545434325e-06, - "loss": 0.4171, - "step": 11493 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.7205742304100496e-06, - "loss": 0.4424, - "step": 11494 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.718880416301272e-06, - "loss": 0.4065, - "step": 11495 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.717186812284476e-06, - "loss": 0.4663, - "step": 11496 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.715493418427045e-06, - "loss": 0.3828, - "step": 11497 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.713800234796334e-06, - "loss": 0.4708, - "step": 11498 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.712107261459702e-06, - "loss": 0.4978, - "step": 11499 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.710414498484504e-06, - "loss": 0.4763, - "step": 11500 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.70872194593808e-06, - "loss": 0.4332, - "step": 11501 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.707029603887762e-06, - "loss": 0.4333, - "step": 11502 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.705337472400873e-06, - "loss": 0.4456, - "step": 11503 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.70364555154473e-06, - "loss": 0.4263, - "step": 11504 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.70195384138664e-06, - "loss": 0.4162, - "step": 11505 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.700262341993902e-06, - "loss": 0.4846, - "step": 11506 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.698571053433807e-06, - "loss": 0.4392, - "step": 11507 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.696879975773637e-06, - "loss": 0.3975, - "step": 11508 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.695189109080667e-06, - "loss": 0.4451, - "step": 11509 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.6934984534221615e-06, - "loss": 0.5006, - "step": 11510 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.691808008865374e-06, - "loss": 0.4379, - "step": 11511 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.690117775477565e-06, - "loss": 0.5275, - "step": 11512 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.688427753325964e-06, - "loss": 0.4371, - "step": 11513 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.6867379424778055e-06, - "loss": 0.4258, - "step": 11514 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.685048343000315e-06, - "loss": 0.4315, - "step": 11515 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.683358954960702e-06, - "loss": 0.4397, - "step": 11516 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.681669778426181e-06, - "loss": 0.4414, - "step": 11517 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.679980813463947e-06, - "loss": 0.4133, - "step": 11518 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.678292060141194e-06, - "loss": 0.4668, - "step": 11519 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.676603518525096e-06, - "loss": 0.414, - "step": 11520 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.674915188682826e-06, - "loss": 0.4944, - "step": 11521 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.6732270706815555e-06, - "loss": 0.4553, - "step": 11522 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.671539164588437e-06, - "loss": 0.3061, - "step": 11523 - }, - { - "epoch": 2.08, - "grad_norm": 0.0, - "learning_rate": 4.669851470470619e-06, - "loss": 0.4336, - "step": 11524 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.66816398839524e-06, - "loss": 0.4338, - "step": 11525 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.666476718429433e-06, - "loss": 0.4468, - "step": 11526 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.664789660640318e-06, - "loss": 0.4966, - "step": 11527 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.663102815095008e-06, - "loss": 0.3666, - "step": 11528 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.661416181860614e-06, - "loss": 0.3873, - "step": 11529 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.659729761004228e-06, - "loss": 0.5051, - "step": 11530 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.658043552592941e-06, - "loss": 0.4459, - "step": 11531 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.656357556693829e-06, - "loss": 0.4558, - "step": 11532 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.654671773373971e-06, - "loss": 0.4042, - "step": 11533 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.652986202700431e-06, - "loss": 0.4082, - "step": 11534 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.651300844740257e-06, - "loss": 0.5189, - "step": 11535 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.649615699560496e-06, - "loss": 0.4681, - "step": 11536 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.647930767228186e-06, - "loss": 0.4749, - "step": 11537 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.646246047810362e-06, - "loss": 0.4112, - "step": 11538 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.644561541374042e-06, - "loss": 0.496, - "step": 11539 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.642877247986237e-06, - "loss": 0.4031, - "step": 11540 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.641193167713954e-06, - "loss": 0.4459, - "step": 11541 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.639509300624187e-06, - "loss": 0.4116, - "step": 11542 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.637825646783922e-06, - "loss": 0.4329, - "step": 11543 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.6361422062601395e-06, - "loss": 0.4603, - "step": 11544 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.634458979119808e-06, - "loss": 0.4553, - "step": 11545 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.6327759654298894e-06, - "loss": 0.4873, - "step": 11546 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.631093165257339e-06, - "loss": 0.4377, - "step": 11547 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.629410578669099e-06, - "loss": 0.4809, - "step": 11548 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.627728205732104e-06, - "loss": 0.3962, - "step": 11549 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.626046046513291e-06, - "loss": 0.4634, - "step": 11550 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.624364101079567e-06, - "loss": 0.3997, - "step": 11551 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.622682369497849e-06, - "loss": 0.4709, - "step": 11552 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.621000851835039e-06, - "loss": 0.4545, - "step": 11553 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.619319548158027e-06, - "loss": 0.4469, - "step": 11554 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.617638458533703e-06, - "loss": 0.4465, - "step": 11555 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.615957583028942e-06, - "loss": 0.4971, - "step": 11556 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.614276921710615e-06, - "loss": 0.4961, - "step": 11557 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.612596474645576e-06, - "loss": 0.4924, - "step": 11558 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.610916241900674e-06, - "loss": 0.4774, - "step": 11559 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.609236223542759e-06, - "loss": 0.4872, - "step": 11560 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.607556419638662e-06, - "loss": 0.476, - "step": 11561 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.60587683025521e-06, - "loss": 0.5059, - "step": 11562 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.604197455459216e-06, - "loss": 0.409, - "step": 11563 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.602518295317492e-06, - "loss": 0.4667, - "step": 11564 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.600839349896836e-06, - "loss": 0.4556, - "step": 11565 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.59916061926404e-06, - "loss": 0.386, - "step": 11566 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.597482103485887e-06, - "loss": 0.4702, - "step": 11567 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.595803802629152e-06, - "loss": 0.4503, - "step": 11568 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.594125716760598e-06, - "loss": 0.426, - "step": 11569 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.592447845946981e-06, - "loss": 0.4884, - "step": 11570 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.590770190255055e-06, - "loss": 0.4042, - "step": 11571 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.589092749751563e-06, - "loss": 0.4188, - "step": 11572 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.587415524503225e-06, - "loss": 0.4462, - "step": 11573 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.585738514576771e-06, - "loss": 0.5118, - "step": 11574 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.58406172003891e-06, - "loss": 0.3944, - "step": 11575 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.5823851409563555e-06, - "loss": 0.382, - "step": 11576 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.580708777395801e-06, - "loss": 0.424, - "step": 11577 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.579032629423935e-06, - "loss": 0.3781, - "step": 11578 - }, - { - "epoch": 2.09, - "grad_norm": 0.0, - "learning_rate": 4.577356697107438e-06, - "loss": 0.4203, - "step": 11579 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.57568098051298e-06, - "loss": 0.4668, - "step": 11580 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.574005479707227e-06, - "loss": 0.3693, - "step": 11581 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5723301947568285e-06, - "loss": 0.5017, - "step": 11582 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.570655125728435e-06, - "loss": 0.4713, - "step": 11583 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.568980272688681e-06, - "loss": 0.4611, - "step": 11584 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.567305635704196e-06, - "loss": 0.4953, - "step": 11585 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.565631214841595e-06, - "loss": 0.3904, - "step": 11586 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.563957010167501e-06, - "loss": 0.48, - "step": 11587 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.562283021748507e-06, - "loss": 0.3846, - "step": 11588 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.56060924965121e-06, - "loss": 0.4603, - "step": 11589 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.558935693942195e-06, - "loss": 0.4745, - "step": 11590 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.557262354688035e-06, - "loss": 0.429, - "step": 11591 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.555589231955305e-06, - "loss": 0.3929, - "step": 11592 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5539163258105635e-06, - "loss": 0.4986, - "step": 11593 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.55224363632036e-06, - "loss": 0.4788, - "step": 11594 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.550571163551242e-06, - "loss": 0.4542, - "step": 11595 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.54889890756973e-06, - "loss": 0.4519, - "step": 11596 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.54722686844236e-06, - "loss": 0.4082, - "step": 11597 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.545555046235649e-06, - "loss": 0.4498, - "step": 11598 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.543883441016101e-06, - "loss": 0.4246, - "step": 11599 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.542212052850216e-06, - "loss": 0.4629, - "step": 11600 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.540540881804486e-06, - "loss": 0.4741, - "step": 11601 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.538869927945392e-06, - "loss": 0.4051, - "step": 11602 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5371991913394075e-06, - "loss": 0.4937, - "step": 11603 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.535528672052998e-06, - "loss": 0.405, - "step": 11604 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.533858370152619e-06, - "loss": 0.5296, - "step": 11605 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.532188285704717e-06, - "loss": 0.4229, - "step": 11606 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.530518418775734e-06, - "loss": 0.4773, - "step": 11607 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.528848769432093e-06, - "loss": 0.492, - "step": 11608 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.527179337740224e-06, - "loss": 0.4652, - "step": 11609 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.52551012376654e-06, - "loss": 0.4067, - "step": 11610 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.523841127577439e-06, - "loss": 0.5092, - "step": 11611 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.522172349239319e-06, - "loss": 0.4088, - "step": 11612 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.520503788818562e-06, - "loss": 0.4568, - "step": 11613 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.518835446381555e-06, - "loss": 0.4715, - "step": 11614 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.517167321994666e-06, - "loss": 0.5071, - "step": 11615 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.515499415724252e-06, - "loss": 0.478, - "step": 11616 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.513831727636666e-06, - "loss": 0.4586, - "step": 11617 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.512164257798252e-06, - "loss": 0.3842, - "step": 11618 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5104970062753465e-06, - "loss": 0.4449, - "step": 11619 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5088299731342735e-06, - "loss": 0.3798, - "step": 11620 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.507163158441351e-06, - "loss": 0.4019, - "step": 11621 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.505496562262888e-06, - "loss": 0.4179, - "step": 11622 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.503830184665184e-06, - "loss": 0.4787, - "step": 11623 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.5021640257145275e-06, - "loss": 0.4373, - "step": 11624 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.500498085477211e-06, - "loss": 0.4138, - "step": 11625 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.498832364019499e-06, - "loss": 0.442, - "step": 11626 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.497166861407659e-06, - "loss": 0.4394, - "step": 11627 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.495501577707948e-06, - "loss": 0.447, - "step": 11628 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.49383651298661e-06, - "loss": 0.5047, - "step": 11629 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.492171667309893e-06, - "loss": 0.4756, - "step": 11630 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.4905070407440225e-06, - "loss": 0.489, - "step": 11631 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.4888426333552194e-06, - "loss": 0.4106, - "step": 11632 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.487178445209701e-06, - "loss": 0.425, - "step": 11633 - }, - { - "epoch": 2.1, - "grad_norm": 0.0, - "learning_rate": 4.485514476373661e-06, - "loss": 0.3943, - "step": 11634 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.483850726913305e-06, - "loss": 0.466, - "step": 11635 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.482187196894818e-06, - "loss": 0.5182, - "step": 11636 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.480523886384376e-06, - "loss": 0.5098, - "step": 11637 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.47886079544815e-06, - "loss": 0.4901, - "step": 11638 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.477197924152299e-06, - "loss": 0.4923, - "step": 11639 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.475535272562976e-06, - "loss": 0.3894, - "step": 11640 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4738728407463235e-06, - "loss": 0.5087, - "step": 11641 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.472210628768477e-06, - "loss": 0.444, - "step": 11642 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.470548636695561e-06, - "loss": 0.4059, - "step": 11643 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4688868645936925e-06, - "loss": 0.4418, - "step": 11644 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4672253125289785e-06, - "loss": 0.3855, - "step": 11645 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4655639805675174e-06, - "loss": 0.4139, - "step": 11646 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.463902868775406e-06, - "loss": 0.491, - "step": 11647 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.462241977218726e-06, - "loss": 0.3936, - "step": 11648 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.460581305963543e-06, - "loss": 0.4218, - "step": 11649 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.458920855075925e-06, - "loss": 0.3423, - "step": 11650 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.457260624621924e-06, - "loss": 0.4468, - "step": 11651 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.455600614667594e-06, - "loss": 0.4477, - "step": 11652 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.45394082527897e-06, - "loss": 0.3982, - "step": 11653 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.452281256522082e-06, - "loss": 0.5143, - "step": 11654 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.450621908462949e-06, - "loss": 0.4468, - "step": 11655 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.448962781167582e-06, - "loss": 0.4265, - "step": 11656 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.447303874701986e-06, - "loss": 0.4781, - "step": 11657 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.445645189132154e-06, - "loss": 0.4722, - "step": 11658 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.443986724524071e-06, - "loss": 0.4474, - "step": 11659 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.442328480943714e-06, - "loss": 0.4595, - "step": 11660 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.440670458457051e-06, - "loss": 0.4445, - "step": 11661 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.439012657130037e-06, - "loss": 0.498, - "step": 11662 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4373550770286344e-06, - "loss": 0.3973, - "step": 11663 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.435697718218771e-06, - "loss": 0.4108, - "step": 11664 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.434040580766386e-06, - "loss": 0.5038, - "step": 11665 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.432383664737401e-06, - "loss": 0.3754, - "step": 11666 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.430726970197727e-06, - "loss": 0.5027, - "step": 11667 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.42907049721328e-06, - "loss": 0.4242, - "step": 11668 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.427414245849953e-06, - "loss": 0.4198, - "step": 11669 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.425758216173631e-06, - "loss": 0.4421, - "step": 11670 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.424102408250202e-06, - "loss": 0.3462, - "step": 11671 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.422446822145523e-06, - "loss": 0.429, - "step": 11672 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.420791457925468e-06, - "loss": 0.4388, - "step": 11673 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.419136315655887e-06, - "loss": 0.4733, - "step": 11674 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.417481395402624e-06, - "loss": 0.4701, - "step": 11675 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4158266972315135e-06, - "loss": 0.4426, - "step": 11676 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.414172221208384e-06, - "loss": 0.4306, - "step": 11677 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.412517967399053e-06, - "loss": 0.407, - "step": 11678 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.410863935869328e-06, - "loss": 0.4559, - "step": 11679 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4092101266850105e-06, - "loss": 0.4669, - "step": 11680 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.407556539911893e-06, - "loss": 0.4858, - "step": 11681 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4059031756157555e-06, - "loss": 0.4624, - "step": 11682 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4042500338623725e-06, - "loss": 0.5016, - "step": 11683 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.4025971147175075e-06, - "loss": 0.4719, - "step": 11684 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.400944418246921e-06, - "loss": 0.4537, - "step": 11685 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.39929194451636e-06, - "loss": 0.4373, - "step": 11686 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.397639693591557e-06, - "loss": 0.6009, - "step": 11687 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.395987665538244e-06, - "loss": 0.4197, - "step": 11688 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.394335860422139e-06, - "loss": 0.4529, - "step": 11689 - }, - { - "epoch": 2.11, - "grad_norm": 0.0, - "learning_rate": 4.392684278308959e-06, - "loss": 0.4613, - "step": 11690 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.391032919264405e-06, - "loss": 0.4794, - "step": 11691 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.389381783354169e-06, - "loss": 0.4832, - "step": 11692 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.387730870643942e-06, - "loss": 0.474, - "step": 11693 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.386080181199389e-06, - "loss": 0.2911, - "step": 11694 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.384429715086186e-06, - "loss": 0.5264, - "step": 11695 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.382779472369989e-06, - "loss": 0.4104, - "step": 11696 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.381129453116447e-06, - "loss": 0.451, - "step": 11697 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3794796573912025e-06, - "loss": 0.4512, - "step": 11698 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3778300852598845e-06, - "loss": 0.4064, - "step": 11699 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.376180736788113e-06, - "loss": 0.4397, - "step": 11700 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.374531612041514e-06, - "loss": 0.5078, - "step": 11701 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3728827110856805e-06, - "loss": 0.402, - "step": 11702 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.371234033986212e-06, - "loss": 0.4442, - "step": 11703 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.369585580808697e-06, - "loss": 0.4125, - "step": 11704 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.36793735161871e-06, - "loss": 0.4268, - "step": 11705 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3662893464818255e-06, - "loss": 0.4008, - "step": 11706 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.364641565463604e-06, - "loss": 0.4444, - "step": 11707 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.362994008629593e-06, - "loss": 0.3194, - "step": 11708 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.361346676045341e-06, - "loss": 0.4115, - "step": 11709 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.359699567776371e-06, - "loss": 0.4291, - "step": 11710 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3580526838882185e-06, - "loss": 0.3979, - "step": 11711 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.356406024446395e-06, - "loss": 0.5119, - "step": 11712 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.354759589516407e-06, - "loss": 0.414, - "step": 11713 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3531133791637546e-06, - "loss": 0.5303, - "step": 11714 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.351467393453925e-06, - "loss": 0.4074, - "step": 11715 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3498216324523986e-06, - "loss": 0.4156, - "step": 11716 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.348176096224647e-06, - "loss": 0.5346, - "step": 11717 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.346530784836134e-06, - "loss": 0.5, - "step": 11718 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.34488569835231e-06, - "loss": 0.429, - "step": 11719 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.343240836838622e-06, - "loss": 0.5232, - "step": 11720 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.341596200360499e-06, - "loss": 0.4335, - "step": 11721 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3399517889833764e-06, - "loss": 0.3542, - "step": 11722 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3383076027726676e-06, - "loss": 0.5085, - "step": 11723 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.336663641793786e-06, - "loss": 0.4441, - "step": 11724 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.335019906112123e-06, - "loss": 0.4245, - "step": 11725 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3333763957930675e-06, - "loss": 0.5, - "step": 11726 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.331733110902011e-06, - "loss": 0.5365, - "step": 11727 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.330090051504321e-06, - "loss": 0.4336, - "step": 11728 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.328447217665363e-06, - "loss": 0.4623, - "step": 11729 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.32680460945049e-06, - "loss": 0.4307, - "step": 11730 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.325162226925049e-06, - "loss": 0.4153, - "step": 11731 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.323520070154375e-06, - "loss": 0.3886, - "step": 11732 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.3218781392037975e-06, - "loss": 0.4385, - "step": 11733 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.320236434138635e-06, - "loss": 0.4216, - "step": 11734 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.318594955024196e-06, - "loss": 0.4608, - "step": 11735 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.316953701925784e-06, - "loss": 0.4996, - "step": 11736 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.315312674908689e-06, - "loss": 0.5688, - "step": 11737 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.31367187403819e-06, - "loss": 0.4227, - "step": 11738 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.312031299379573e-06, - "loss": 0.5031, - "step": 11739 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.310390950998091e-06, - "loss": 0.449, - "step": 11740 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.308750828959004e-06, - "loss": 0.4382, - "step": 11741 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.307110933327558e-06, - "loss": 0.5635, - "step": 11742 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.305471264168989e-06, - "loss": 0.3984, - "step": 11743 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.303831821548531e-06, - "loss": 0.4384, - "step": 11744 - }, - { - "epoch": 2.12, - "grad_norm": 0.0, - "learning_rate": 4.302192605531401e-06, - "loss": 0.454, - "step": 11745 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.3005536161828145e-06, - "loss": 0.3948, - "step": 11746 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.298914853567964e-06, - "loss": 0.4163, - "step": 11747 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.297276317752046e-06, - "loss": 0.4262, - "step": 11748 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.295638008800247e-06, - "loss": 0.4768, - "step": 11749 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.293999926777741e-06, - "loss": 0.4008, - "step": 11750 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.292362071749692e-06, - "loss": 0.5426, - "step": 11751 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2907244437812576e-06, - "loss": 0.4281, - "step": 11752 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.289087042937586e-06, - "loss": 0.4637, - "step": 11753 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2874498692838155e-06, - "loss": 0.4808, - "step": 11754 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.285812922885074e-06, - "loss": 0.4437, - "step": 11755 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2841762038064835e-06, - "loss": 0.4306, - "step": 11756 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.282539712113156e-06, - "loss": 0.4172, - "step": 11757 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.280903447870194e-06, - "loss": 0.3711, - "step": 11758 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2792674111426856e-06, - "loss": 0.4403, - "step": 11759 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.277631601995725e-06, - "loss": 0.4452, - "step": 11760 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.27599602049438e-06, - "loss": 0.4548, - "step": 11761 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.274360666703723e-06, - "loss": 0.4241, - "step": 11762 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.272725540688804e-06, - "loss": 0.437, - "step": 11763 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.271090642514672e-06, - "loss": 0.3468, - "step": 11764 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.26945597224637e-06, - "loss": 0.4064, - "step": 11765 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.267821529948928e-06, - "loss": 0.4756, - "step": 11766 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.266187315687364e-06, - "loss": 0.4525, - "step": 11767 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.264553329526691e-06, - "loss": 0.4595, - "step": 11768 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.262919571531914e-06, - "loss": 0.429, - "step": 11769 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.261286041768022e-06, - "loss": 0.4388, - "step": 11770 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.259652740300004e-06, - "loss": 0.4536, - "step": 11771 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.258019667192833e-06, - "loss": 0.462, - "step": 11772 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.256386822511477e-06, - "loss": 0.4662, - "step": 11773 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.254754206320891e-06, - "loss": 0.4809, - "step": 11774 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2531218186860265e-06, - "loss": 0.4628, - "step": 11775 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.251489659671817e-06, - "loss": 0.4363, - "step": 11776 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.249857729343203e-06, - "loss": 0.3635, - "step": 11777 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.248226027765097e-06, - "loss": 0.4307, - "step": 11778 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2465945550024115e-06, - "loss": 0.455, - "step": 11779 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.24496331112005e-06, - "loss": 0.4699, - "step": 11780 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.243332296182905e-06, - "loss": 0.4537, - "step": 11781 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.241701510255867e-06, - "loss": 0.4648, - "step": 11782 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2400709534038054e-06, - "loss": 0.4924, - "step": 11783 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.238440625691593e-06, - "loss": 0.3842, - "step": 11784 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.23681052718408e-06, - "loss": 0.4113, - "step": 11785 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.235180657946113e-06, - "loss": 0.4099, - "step": 11786 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2335510180425376e-06, - "loss": 0.4236, - "step": 11787 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.231921607538183e-06, - "loss": 0.4595, - "step": 11788 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.230292426497866e-06, - "loss": 0.4725, - "step": 11789 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.228663474986402e-06, - "loss": 0.435, - "step": 11790 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2270347530685916e-06, - "loss": 0.4887, - "step": 11791 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.225406260809229e-06, - "loss": 0.4652, - "step": 11792 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.223777998273095e-06, - "loss": 0.4004, - "step": 11793 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2221499655249695e-06, - "loss": 0.3495, - "step": 11794 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.220522162629617e-06, - "loss": 0.513, - "step": 11795 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.218894589651793e-06, - "loss": 0.3978, - "step": 11796 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.2172672466562416e-06, - "loss": 0.4559, - "step": 11797 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.21564013370771e-06, - "loss": 0.4794, - "step": 11798 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.214013250870926e-06, - "loss": 0.4134, - "step": 11799 - }, - { - "epoch": 2.13, - "grad_norm": 0.0, - "learning_rate": 4.212386598210604e-06, - "loss": 0.4016, - "step": 11800 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.210760175791456e-06, - "loss": 0.4105, - "step": 11801 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.209133983678184e-06, - "loss": 0.454, - "step": 11802 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.207508021935486e-06, - "loss": 0.4962, - "step": 11803 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.205882290628041e-06, - "loss": 0.4526, - "step": 11804 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.204256789820525e-06, - "loss": 0.5128, - "step": 11805 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.202631519577601e-06, - "loss": 0.4078, - "step": 11806 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.201006479963928e-06, - "loss": 0.402, - "step": 11807 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1993816710441525e-06, - "loss": 0.4769, - "step": 11808 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1977570928829095e-06, - "loss": 0.4223, - "step": 11809 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.196132745544829e-06, - "loss": 0.4082, - "step": 11810 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1945086290945315e-06, - "loss": 0.3924, - "step": 11811 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.192884743596626e-06, - "loss": 0.4347, - "step": 11812 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.191261089115709e-06, - "loss": 0.4682, - "step": 11813 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.189637665716382e-06, - "loss": 0.429, - "step": 11814 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.188014473463224e-06, - "loss": 0.4556, - "step": 11815 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.186391512420804e-06, - "loss": 0.51, - "step": 11816 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.18476878265369e-06, - "loss": 0.4695, - "step": 11817 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.183146284226432e-06, - "loss": 0.4507, - "step": 11818 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.181524017203582e-06, - "loss": 0.4231, - "step": 11819 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.179901981649674e-06, - "loss": 0.4409, - "step": 11820 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.178280177629237e-06, - "loss": 0.4651, - "step": 11821 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.17665860520679e-06, - "loss": 0.4415, - "step": 11822 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.175037264446833e-06, - "loss": 0.4122, - "step": 11823 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.173416155413876e-06, - "loss": 0.4482, - "step": 11824 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1717952781724056e-06, - "loss": 0.4986, - "step": 11825 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.170174632786903e-06, - "loss": 0.4037, - "step": 11826 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.168554219321842e-06, - "loss": 0.4244, - "step": 11827 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.166934037841683e-06, - "loss": 0.4663, - "step": 11828 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.16531408841088e-06, - "loss": 0.4598, - "step": 11829 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.163694371093878e-06, - "loss": 0.5306, - "step": 11830 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.162074885955113e-06, - "loss": 0.4242, - "step": 11831 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.16045563305901e-06, - "loss": 0.4329, - "step": 11832 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.158836612469984e-06, - "loss": 0.494, - "step": 11833 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.157217824252446e-06, - "loss": 0.4473, - "step": 11834 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.155599268470788e-06, - "loss": 0.4742, - "step": 11835 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.153980945189406e-06, - "loss": 0.5195, - "step": 11836 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.152362854472681e-06, - "loss": 0.4504, - "step": 11837 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.150744996384976e-06, - "loss": 0.4146, - "step": 11838 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.149127370990654e-06, - "loss": 0.4126, - "step": 11839 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.147509978354065e-06, - "loss": 0.4994, - "step": 11840 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1458928185395595e-06, - "loss": 0.4547, - "step": 11841 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.144275891611466e-06, - "loss": 0.5115, - "step": 11842 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1426591976341095e-06, - "loss": 0.4435, - "step": 11843 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.141042736671803e-06, - "loss": 0.4301, - "step": 11844 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.139426508788854e-06, - "loss": 0.4117, - "step": 11845 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1378105140495584e-06, - "loss": 0.4419, - "step": 11846 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.136194752518202e-06, - "loss": 0.3898, - "step": 11847 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.134579224259064e-06, - "loss": 0.4522, - "step": 11848 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.1329639293364135e-06, - "loss": 0.4231, - "step": 11849 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.131348867814509e-06, - "loss": 0.4552, - "step": 11850 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.129734039757595e-06, - "loss": 0.4712, - "step": 11851 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.128119445229924e-06, - "loss": 0.406, - "step": 11852 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.126505084295719e-06, - "loss": 0.4512, - "step": 11853 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.124890957019202e-06, - "loss": 0.4963, - "step": 11854 - }, - { - "epoch": 2.14, - "grad_norm": 0.0, - "learning_rate": 4.123277063464588e-06, - "loss": 0.4372, - "step": 11855 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.121663403696077e-06, - "loss": 0.4183, - "step": 11856 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.120049977777868e-06, - "loss": 0.4086, - "step": 11857 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.118436785774145e-06, - "loss": 0.5226, - "step": 11858 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.1168238277490815e-06, - "loss": 0.4221, - "step": 11859 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.115211103766849e-06, - "loss": 0.45, - "step": 11860 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.113598613891592e-06, - "loss": 0.3431, - "step": 11861 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.11198635818747e-06, - "loss": 0.445, - "step": 11862 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.1103743367186165e-06, - "loss": 0.4876, - "step": 11863 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.108762549549163e-06, - "loss": 0.4463, - "step": 11864 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.1071509967432265e-06, - "loss": 0.4244, - "step": 11865 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.105539678364918e-06, - "loss": 0.4554, - "step": 11866 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.103928594478341e-06, - "loss": 0.4441, - "step": 11867 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.102317745147583e-06, - "loss": 0.5187, - "step": 11868 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.100707130436728e-06, - "loss": 0.4868, - "step": 11869 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.09909675040985e-06, - "loss": 0.4565, - "step": 11870 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.097486605131013e-06, - "loss": 0.4522, - "step": 11871 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.095876694664269e-06, - "loss": 0.3636, - "step": 11872 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.094267019073662e-06, - "loss": 0.4494, - "step": 11873 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.092657578423233e-06, - "loss": 0.4836, - "step": 11874 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.0910483727770104e-06, - "loss": 0.4356, - "step": 11875 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.089439402199e-06, - "loss": 0.4694, - "step": 11876 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.087830666753217e-06, - "loss": 0.4772, - "step": 11877 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.086222166503655e-06, - "loss": 0.4684, - "step": 11878 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.084613901514309e-06, - "loss": 0.3784, - "step": 11879 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.083005871849156e-06, - "loss": 0.4641, - "step": 11880 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.081398077572166e-06, - "loss": 0.4211, - "step": 11881 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.079790518747298e-06, - "loss": 0.4732, - "step": 11882 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.078183195438508e-06, - "loss": 0.4232, - "step": 11883 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.076576107709732e-06, - "loss": 0.4628, - "step": 11884 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.074969255624908e-06, - "loss": 0.4018, - "step": 11885 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.0733626392479565e-06, - "loss": 0.4628, - "step": 11886 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.071756258642791e-06, - "loss": 0.429, - "step": 11887 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.070150113873318e-06, - "loss": 0.5046, - "step": 11888 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.068544205003428e-06, - "loss": 0.4552, - "step": 11889 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.066938532097019e-06, - "loss": 0.4139, - "step": 11890 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.0653330952179544e-06, - "loss": 0.4475, - "step": 11891 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.063727894430106e-06, - "loss": 0.4479, - "step": 11892 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.062122929797332e-06, - "loss": 0.4592, - "step": 11893 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.060518201383476e-06, - "loss": 0.4224, - "step": 11894 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.058913709252384e-06, - "loss": 0.464, - "step": 11895 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.057309453467884e-06, - "loss": 0.4798, - "step": 11896 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.055705434093793e-06, - "loss": 0.4584, - "step": 11897 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.054101651193926e-06, - "loss": 0.4497, - "step": 11898 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.052498104832074e-06, - "loss": 0.4767, - "step": 11899 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.050894795072041e-06, - "loss": 0.46, - "step": 11900 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.049291721977604e-06, - "loss": 0.4581, - "step": 11901 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.0476888856125366e-06, - "loss": 0.3887, - "step": 11902 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.046086286040601e-06, - "loss": 0.3881, - "step": 11903 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.044483923325553e-06, - "loss": 0.4413, - "step": 11904 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.042881797531138e-06, - "loss": 0.4381, - "step": 11905 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.041279908721089e-06, - "loss": 0.5002, - "step": 11906 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.039678256959133e-06, - "loss": 0.4425, - "step": 11907 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.0380768423089874e-06, - "loss": 0.4737, - "step": 11908 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.036475664834358e-06, - "loss": 0.4377, - "step": 11909 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.03487472459894e-06, - "loss": 0.3899, - "step": 11910 - }, - { - "epoch": 2.15, - "grad_norm": 0.0, - "learning_rate": 4.033274021666426e-06, - "loss": 0.4357, - "step": 11911 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.031673556100494e-06, - "loss": 0.397, - "step": 11912 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.030073327964816e-06, - "loss": 0.4087, - "step": 11913 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.028473337323045e-06, - "loss": 0.3785, - "step": 11914 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.02687358423883e-06, - "loss": 0.4954, - "step": 11915 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.025274068775819e-06, - "loss": 0.4287, - "step": 11916 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.0236747909976425e-06, - "loss": 0.4886, - "step": 11917 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.02207575096792e-06, - "loss": 0.4697, - "step": 11918 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.020476948750265e-06, - "loss": 0.4975, - "step": 11919 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.01887838440828e-06, - "loss": 0.4027, - "step": 11920 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.017280058005561e-06, - "loss": 0.4591, - "step": 11921 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.015681969605688e-06, - "loss": 0.4643, - "step": 11922 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.014084119272239e-06, - "loss": 0.4895, - "step": 11923 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.012486507068778e-06, - "loss": 0.4252, - "step": 11924 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.010889133058861e-06, - "loss": 0.4191, - "step": 11925 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.009291997306034e-06, - "loss": 0.374, - "step": 11926 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.00769509987383e-06, - "loss": 0.407, - "step": 11927 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.0060984408257884e-06, - "loss": 0.4509, - "step": 11928 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.004502020225416e-06, - "loss": 0.4323, - "step": 11929 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.002905838136223e-06, - "loss": 0.4814, - "step": 11930 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 4.0013098946217084e-06, - "loss": 0.4295, - "step": 11931 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.999714189745361e-06, - "loss": 0.4041, - "step": 11932 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.998118723570664e-06, - "loss": 0.4761, - "step": 11933 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.996523496161087e-06, - "loss": 0.4065, - "step": 11934 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.99492850758009e-06, - "loss": 0.4521, - "step": 11935 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.993333757891128e-06, - "loss": 0.5217, - "step": 11936 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.991739247157631e-06, - "loss": 0.4311, - "step": 11937 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.9901449754430435e-06, - "loss": 0.5293, - "step": 11938 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.988550942810784e-06, - "loss": 0.4537, - "step": 11939 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.986957149324267e-06, - "loss": 0.4429, - "step": 11940 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.985363595046895e-06, - "loss": 0.4874, - "step": 11941 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.983770280042062e-06, - "loss": 0.3268, - "step": 11942 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.982177204373154e-06, - "loss": 0.3931, - "step": 11943 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.980584368103547e-06, - "loss": 0.424, - "step": 11944 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.978991771296605e-06, - "loss": 0.5023, - "step": 11945 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.9773994140156845e-06, - "loss": 0.4476, - "step": 11946 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.975807296324134e-06, - "loss": 0.3685, - "step": 11947 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.974215418285285e-06, - "loss": 0.4244, - "step": 11948 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.9726237799624715e-06, - "loss": 0.3768, - "step": 11949 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.971032381419011e-06, - "loss": 0.4397, - "step": 11950 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.969441222718213e-06, - "loss": 0.4752, - "step": 11951 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.967850303923372e-06, - "loss": 0.4243, - "step": 11952 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.966259625097775e-06, - "loss": 0.4066, - "step": 11953 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.96466918630471e-06, - "loss": 0.4792, - "step": 11954 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.963078987607445e-06, - "loss": 0.416, - "step": 11955 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.961489029069239e-06, - "loss": 0.5741, - "step": 11956 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.959899310753344e-06, - "loss": 0.4838, - "step": 11957 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.958309832723001e-06, - "loss": 0.378, - "step": 11958 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.956720595041444e-06, - "loss": 0.4959, - "step": 11959 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.955131597771893e-06, - "loss": 0.3857, - "step": 11960 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.953542840977563e-06, - "loss": 0.4671, - "step": 11961 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.951954324721657e-06, - "loss": 0.4273, - "step": 11962 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.95036604906737e-06, - "loss": 0.4479, - "step": 11963 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.948778014077884e-06, - "loss": 0.3982, - "step": 11964 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.9471902198163715e-06, - "loss": 0.4915, - "step": 11965 - }, - { - "epoch": 2.16, - "grad_norm": 0.0, - "learning_rate": 3.945602666346008e-06, - "loss": 0.3986, - "step": 11966 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.94401535372994e-06, - "loss": 0.402, - "step": 11967 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9424282820313144e-06, - "loss": 0.4737, - "step": 11968 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9408414513132685e-06, - "loss": 0.4327, - "step": 11969 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.939254861638927e-06, - "loss": 0.4816, - "step": 11970 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.937668513071413e-06, - "loss": 0.4293, - "step": 11971 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9360824056738316e-06, - "loss": 0.4331, - "step": 11972 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.934496539509278e-06, - "loss": 0.3911, - "step": 11973 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.932910914640849e-06, - "loss": 0.4301, - "step": 11974 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.931325531131609e-06, - "loss": 0.4053, - "step": 11975 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.929740389044638e-06, - "loss": 0.4272, - "step": 11976 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.928155488442995e-06, - "loss": 0.4809, - "step": 11977 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.926570829389727e-06, - "loss": 0.451, - "step": 11978 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.924986411947878e-06, - "loss": 0.3997, - "step": 11979 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.923402236180474e-06, - "loss": 0.4084, - "step": 11980 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.921818302150541e-06, - "loss": 0.4884, - "step": 11981 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.920234609921087e-06, - "loss": 0.4788, - "step": 11982 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9186511595551156e-06, - "loss": 0.4038, - "step": 11983 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.91706795111562e-06, - "loss": 0.3754, - "step": 11984 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9154849846655805e-06, - "loss": 0.4865, - "step": 11985 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.913902260267969e-06, - "loss": 0.4776, - "step": 11986 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.912319777985756e-06, - "loss": 0.4596, - "step": 11987 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.910737537881889e-06, - "loss": 0.4451, - "step": 11988 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.909155540019319e-06, - "loss": 0.4087, - "step": 11989 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.907573784460972e-06, - "loss": 0.5305, - "step": 11990 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.905992271269774e-06, - "loss": 0.3665, - "step": 11991 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.904411000508645e-06, - "loss": 0.4114, - "step": 11992 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.9028299722404905e-06, - "loss": 0.4263, - "step": 11993 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.901249186528203e-06, - "loss": 0.5005, - "step": 11994 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.899668643434672e-06, - "loss": 0.3815, - "step": 11995 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.898088343022772e-06, - "loss": 0.5119, - "step": 11996 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.896508285355371e-06, - "loss": 0.4766, - "step": 11997 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.894928470495327e-06, - "loss": 0.4325, - "step": 11998 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.893348898505485e-06, - "loss": 0.4998, - "step": 11999 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.8917695694486875e-06, - "loss": 0.4151, - "step": 12000 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.89019048338776e-06, - "loss": 0.5061, - "step": 12001 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.888611640385518e-06, - "loss": 0.4872, - "step": 12002 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.887033040504777e-06, - "loss": 0.4492, - "step": 12003 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.8854546838083375e-06, - "loss": 0.4471, - "step": 12004 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.883876570358983e-06, - "loss": 0.5531, - "step": 12005 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.882298700219496e-06, - "loss": 0.4119, - "step": 12006 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.880721073452641e-06, - "loss": 0.4616, - "step": 12007 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.879143690121191e-06, - "loss": 0.505, - "step": 12008 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.877566550287889e-06, - "loss": 0.4357, - "step": 12009 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.8759896540154785e-06, - "loss": 0.5248, - "step": 12010 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.874413001366694e-06, - "loss": 0.5053, - "step": 12011 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.872836592404247e-06, - "loss": 0.3827, - "step": 12012 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.8712604271908605e-06, - "loss": 0.4159, - "step": 12013 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.869684505789233e-06, - "loss": 0.4472, - "step": 12014 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.868108828262058e-06, - "loss": 0.466, - "step": 12015 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.866533394672017e-06, - "loss": 0.454, - "step": 12016 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.864958205081787e-06, - "loss": 0.4707, - "step": 12017 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.863383259554028e-06, - "loss": 0.4279, - "step": 12018 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.861808558151396e-06, - "loss": 0.4164, - "step": 12019 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.860234100936535e-06, - "loss": 0.4423, - "step": 12020 - }, - { - "epoch": 2.17, - "grad_norm": 0.0, - "learning_rate": 3.8586598879720794e-06, - "loss": 0.4207, - "step": 12021 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.857085919320653e-06, - "loss": 0.4148, - "step": 12022 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.855512195044873e-06, - "loss": 0.4055, - "step": 12023 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.853938715207341e-06, - "loss": 0.4774, - "step": 12024 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.852365479870659e-06, - "loss": 0.4868, - "step": 12025 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8507924890974094e-06, - "loss": 0.3961, - "step": 12026 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.849219742950172e-06, - "loss": 0.3977, - "step": 12027 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.847647241491508e-06, - "loss": 0.4253, - "step": 12028 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8460749847839716e-06, - "loss": 0.4729, - "step": 12029 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.844502972890118e-06, - "loss": 0.4972, - "step": 12030 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.842931205872481e-06, - "loss": 0.3931, - "step": 12031 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.841359683793588e-06, - "loss": 0.4543, - "step": 12032 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8397884067159586e-06, - "loss": 0.4275, - "step": 12033 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.838217374702098e-06, - "loss": 0.4002, - "step": 12034 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.836646587814506e-06, - "loss": 0.4321, - "step": 12035 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.835076046115672e-06, - "loss": 0.4692, - "step": 12036 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.833505749668074e-06, - "loss": 0.4517, - "step": 12037 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.831935698534181e-06, - "loss": 0.4296, - "step": 12038 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.830365892776452e-06, - "loss": 0.45, - "step": 12039 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.828796332457333e-06, - "loss": 0.451, - "step": 12040 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.827227017639272e-06, - "loss": 0.4665, - "step": 12041 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.825657948384698e-06, - "loss": 0.4359, - "step": 12042 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.824089124756024e-06, - "loss": 0.387, - "step": 12043 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.822520546815666e-06, - "loss": 0.5047, - "step": 12044 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.820952214626018e-06, - "loss": 0.4487, - "step": 12045 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.819384128249481e-06, - "loss": 0.4759, - "step": 12046 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.817816287748431e-06, - "loss": 0.3943, - "step": 12047 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.81624869318524e-06, - "loss": 0.4341, - "step": 12048 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.814681344622272e-06, - "loss": 0.4129, - "step": 12049 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8131142421218704e-06, - "loss": 0.4481, - "step": 12050 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8115473857463848e-06, - "loss": 0.487, - "step": 12051 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.809980775558146e-06, - "loss": 0.396, - "step": 12052 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8084144116194777e-06, - "loss": 0.4411, - "step": 12053 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.80684829399269e-06, - "loss": 0.4563, - "step": 12054 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8052824227400863e-06, - "loss": 0.4818, - "step": 12055 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8037167979239596e-06, - "loss": 0.4271, - "step": 12056 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.8021514196065944e-06, - "loss": 0.3776, - "step": 12057 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.800586287850262e-06, - "loss": 0.5059, - "step": 12058 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.799021402717228e-06, - "loss": 0.4737, - "step": 12059 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7974567642697456e-06, - "loss": 0.4805, - "step": 12060 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7958923725700593e-06, - "loss": 0.4862, - "step": 12061 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7943282276803985e-06, - "loss": 0.3324, - "step": 12062 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7927643296629947e-06, - "loss": 0.3856, - "step": 12063 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7912006785800638e-06, - "loss": 0.4957, - "step": 12064 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7896372744938016e-06, - "loss": 0.4341, - "step": 12065 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7880741174664083e-06, - "loss": 0.4725, - "step": 12066 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.786511207560064e-06, - "loss": 0.5274, - "step": 12067 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7849485448369527e-06, - "loss": 0.4308, - "step": 12068 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7833861293592343e-06, - "loss": 0.3727, - "step": 12069 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.781823961189066e-06, - "loss": 0.4739, - "step": 12070 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7802620403885937e-06, - "loss": 0.458, - "step": 12071 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7787003670199507e-06, - "loss": 0.4863, - "step": 12072 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7771389411452652e-06, - "loss": 0.4093, - "step": 12073 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7755777628266545e-06, - "loss": 0.4476, - "step": 12074 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.7740168321262217e-06, - "loss": 0.4298, - "step": 12075 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.772456149106065e-06, - "loss": 0.4492, - "step": 12076 - }, - { - "epoch": 2.18, - "grad_norm": 0.0, - "learning_rate": 3.770895713828272e-06, - "loss": 0.4199, - "step": 12077 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.769335526354915e-06, - "loss": 0.4688, - "step": 12078 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.767775586748067e-06, - "loss": 0.4096, - "step": 12079 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.766215895069787e-06, - "loss": 0.4496, - "step": 12080 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7646564513821137e-06, - "loss": 0.42, - "step": 12081 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.763097255747089e-06, - "loss": 0.4861, - "step": 12082 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7615383082267367e-06, - "loss": 0.4825, - "step": 12083 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.75997960888308e-06, - "loss": 0.485, - "step": 12084 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.758421157778125e-06, - "loss": 0.3173, - "step": 12085 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7568629549738677e-06, - "loss": 0.3815, - "step": 12086 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.755305000532302e-06, - "loss": 0.4658, - "step": 12087 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.753747294515393e-06, - "loss": 0.5096, - "step": 12088 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7521898369851216e-06, - "loss": 0.4642, - "step": 12089 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.750632628003441e-06, - "loss": 0.4738, - "step": 12090 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7490756676323005e-06, - "loss": 0.4609, - "step": 12091 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7475189559336378e-06, - "loss": 0.39, - "step": 12092 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7459624929693827e-06, - "loss": 0.4441, - "step": 12093 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7444062788014492e-06, - "loss": 0.4453, - "step": 12094 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7428503134917584e-06, - "loss": 0.4647, - "step": 12095 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7412945971021965e-06, - "loss": 0.4208, - "step": 12096 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7397391296946584e-06, - "loss": 0.3786, - "step": 12097 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7381839113310216e-06, - "loss": 0.3771, - "step": 12098 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7366289420731562e-06, - "loss": 0.4275, - "step": 12099 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7350742219829174e-06, - "loss": 0.4246, - "step": 12100 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.733519751122162e-06, - "loss": 0.5361, - "step": 12101 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.731965529552729e-06, - "loss": 0.4206, - "step": 12102 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7304115573364417e-06, - "loss": 0.422, - "step": 12103 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7288578345351234e-06, - "loss": 0.5017, - "step": 12104 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7273043612105808e-06, - "loss": 0.4345, - "step": 12105 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.725751137424619e-06, - "loss": 0.4551, - "step": 12106 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7241981632390256e-06, - "loss": 0.5543, - "step": 12107 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.72264543871558e-06, - "loss": 0.4235, - "step": 12108 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.721092963916052e-06, - "loss": 0.4547, - "step": 12109 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.719540738902203e-06, - "loss": 0.4255, - "step": 12110 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7179887637357826e-06, - "loss": 0.5085, - "step": 12111 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.716437038478531e-06, - "loss": 0.4621, - "step": 12112 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.714885563192178e-06, - "loss": 0.4144, - "step": 12113 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7133343379384435e-06, - "loss": 0.4364, - "step": 12114 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.71178336277904e-06, - "loss": 0.4024, - "step": 12115 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7102326377756626e-06, - "loss": 0.4274, - "step": 12116 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7086821629900126e-06, - "loss": 0.4866, - "step": 12117 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7071319384837613e-06, - "loss": 0.4429, - "step": 12118 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.705581964318581e-06, - "loss": 0.4095, - "step": 12119 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7040322405561323e-06, - "loss": 0.4396, - "step": 12120 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.7024827672580642e-06, - "loss": 0.4075, - "step": 12121 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.700933544486023e-06, - "loss": 0.4203, - "step": 12122 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6993845723016365e-06, - "loss": 0.5594, - "step": 12123 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6978358507665257e-06, - "loss": 0.4182, - "step": 12124 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.696287379942304e-06, - "loss": 0.4944, - "step": 12125 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6947391598905614e-06, - "loss": 0.4201, - "step": 12126 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6931911906729013e-06, - "loss": 0.4147, - "step": 12127 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6916434723508987e-06, - "loss": 0.4388, - "step": 12128 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6900960049861267e-06, - "loss": 0.459, - "step": 12129 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.6885487886401437e-06, - "loss": 0.5116, - "step": 12130 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.687001823374503e-06, - "loss": 0.4174, - "step": 12131 - }, - { - "epoch": 2.19, - "grad_norm": 0.0, - "learning_rate": 3.685455109250742e-06, - "loss": 0.4478, - "step": 12132 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6839086463304e-06, - "loss": 0.4724, - "step": 12133 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6823624346749874e-06, - "loss": 0.5165, - "step": 12134 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6808164743460217e-06, - "loss": 0.4758, - "step": 12135 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.679270765405002e-06, - "loss": 0.4077, - "step": 12136 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6777253079134145e-06, - "loss": 0.4682, - "step": 12137 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.676180101932749e-06, - "loss": 0.4843, - "step": 12138 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6746351475244723e-06, - "loss": 0.429, - "step": 12139 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6730904447500495e-06, - "loss": 0.4549, - "step": 12140 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.671545993670923e-06, - "loss": 0.4304, - "step": 12141 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6700017943485354e-06, - "loss": 0.4071, - "step": 12142 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.668457846844323e-06, - "loss": 0.4603, - "step": 12143 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.666914151219705e-06, - "loss": 0.4863, - "step": 12144 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.66537070753609e-06, - "loss": 0.4898, - "step": 12145 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6638275158548806e-06, - "loss": 0.5093, - "step": 12146 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6622845762374683e-06, - "loss": 0.4601, - "step": 12147 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6607418887452317e-06, - "loss": 0.4107, - "step": 12148 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6591994534395424e-06, - "loss": 0.5304, - "step": 12149 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6576572703817602e-06, - "loss": 0.4287, - "step": 12150 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6561153396332383e-06, - "loss": 0.384, - "step": 12151 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6545736612553163e-06, - "loss": 0.4207, - "step": 12152 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.653032235309323e-06, - "loss": 0.5055, - "step": 12153 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.651491061856577e-06, - "loss": 0.4122, - "step": 12154 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6499501409583993e-06, - "loss": 0.5118, - "step": 12155 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.64840947267608e-06, - "loss": 0.4285, - "step": 12156 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6468690570709132e-06, - "loss": 0.5352, - "step": 12157 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6453288942041786e-06, - "loss": 0.5052, - "step": 12158 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.643788984137143e-06, - "loss": 0.4479, - "step": 12159 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.642249326931073e-06, - "loss": 0.5358, - "step": 12160 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6407099226472174e-06, - "loss": 0.3749, - "step": 12161 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.639170771346815e-06, - "loss": 0.4315, - "step": 12162 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6376318730910997e-06, - "loss": 0.4924, - "step": 12163 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6360932279412797e-06, - "loss": 0.6115, - "step": 12164 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.634554835958578e-06, - "loss": 0.4719, - "step": 12165 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6330166972041892e-06, - "loss": 0.4207, - "step": 12166 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.631478811739303e-06, - "loss": 0.4873, - "step": 12167 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6299411796250995e-06, - "loss": 0.448, - "step": 12168 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6284038009227485e-06, - "loss": 0.4509, - "step": 12169 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.626866675693409e-06, - "loss": 0.4315, - "step": 12170 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.625329803998232e-06, - "loss": 0.4727, - "step": 12171 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6237931858983556e-06, - "loss": 0.4462, - "step": 12172 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.622256821454908e-06, - "loss": 0.3998, - "step": 12173 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.620720710729011e-06, - "loss": 0.448, - "step": 12174 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6191848537817686e-06, - "loss": 0.3973, - "step": 12175 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.617649250674288e-06, - "loss": 0.4447, - "step": 12176 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6161139014676527e-06, - "loss": 0.4207, - "step": 12177 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.614578806222946e-06, - "loss": 0.4734, - "step": 12178 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6130439650012304e-06, - "loss": 0.4591, - "step": 12179 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6115093778635633e-06, - "loss": 0.4701, - "step": 12180 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6099750448710013e-06, - "loss": 0.4991, - "step": 12181 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6084409660845777e-06, - "loss": 0.4634, - "step": 12182 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.606907141565322e-06, - "loss": 0.4495, - "step": 12183 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6053735713742532e-06, - "loss": 0.5051, - "step": 12184 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6038402555723784e-06, - "loss": 0.4685, - "step": 12185 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.6023071942206943e-06, - "loss": 0.4262, - "step": 12186 - }, - { - "epoch": 2.2, - "grad_norm": 0.0, - "learning_rate": 3.60077438738019e-06, - "loss": 0.4523, - "step": 12187 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.599241835111844e-06, - "loss": 0.4893, - "step": 12188 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5977095374766225e-06, - "loss": 0.5447, - "step": 12189 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.596177494535483e-06, - "loss": 0.4159, - "step": 12190 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5946457063493734e-06, - "loss": 0.4437, - "step": 12191 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5931141729792274e-06, - "loss": 0.4452, - "step": 12192 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5915828944859822e-06, - "loss": 0.4143, - "step": 12193 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.590051870930543e-06, - "loss": 0.4798, - "step": 12194 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5885211023738233e-06, - "loss": 0.4436, - "step": 12195 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5869905888767154e-06, - "loss": 0.4817, - "step": 12196 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5854603305001057e-06, - "loss": 0.4594, - "step": 12197 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5839303273048765e-06, - "loss": 0.4355, - "step": 12198 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.582400579351889e-06, - "loss": 0.5466, - "step": 12199 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5808710867020012e-06, - "loss": 0.5047, - "step": 12200 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5793418494160616e-06, - "loss": 0.502, - "step": 12201 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.577812867554896e-06, - "loss": 0.447, - "step": 12202 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5762841411793392e-06, - "loss": 0.5412, - "step": 12203 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.574755670350204e-06, - "loss": 0.3722, - "step": 12204 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5732274551282943e-06, - "loss": 0.4169, - "step": 12205 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5716994955744078e-06, - "loss": 0.3373, - "step": 12206 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.570171791749326e-06, - "loss": 0.451, - "step": 12207 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5686443437138265e-06, - "loss": 0.4741, - "step": 12208 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.567117151528672e-06, - "loss": 0.444, - "step": 12209 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5655902152546172e-06, - "loss": 0.3795, - "step": 12210 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5640635349524068e-06, - "loss": 0.4494, - "step": 12211 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5625371106827733e-06, - "loss": 0.4146, - "step": 12212 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.561010942506439e-06, - "loss": 0.4224, - "step": 12213 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5594850304841235e-06, - "loss": 0.4898, - "step": 12214 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5579593746765264e-06, - "loss": 0.4427, - "step": 12215 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.556433975144344e-06, - "loss": 0.4145, - "step": 12216 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.554908831948255e-06, - "loss": 0.4191, - "step": 12217 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.553383945148928e-06, - "loss": 0.3285, - "step": 12218 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5518593148070367e-06, - "loss": 0.4824, - "step": 12219 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5503349409832268e-06, - "loss": 0.5237, - "step": 12220 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5488108237381424e-06, - "loss": 0.4952, - "step": 12221 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5472869631324147e-06, - "loss": 0.4348, - "step": 12222 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.545763359226666e-06, - "loss": 0.4552, - "step": 12223 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.544240012081508e-06, - "loss": 0.4688, - "step": 12224 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.542716921757541e-06, - "loss": 0.4189, - "step": 12225 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5411940883153574e-06, - "loss": 0.4936, - "step": 12226 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5396715118155367e-06, - "loss": 0.4765, - "step": 12227 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5381491923186506e-06, - "loss": 0.434, - "step": 12228 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.536627129885256e-06, - "loss": 0.4881, - "step": 12229 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5351053245759105e-06, - "loss": 0.4277, - "step": 12230 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5335837764511526e-06, - "loss": 0.5064, - "step": 12231 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.532062485571507e-06, - "loss": 0.4281, - "step": 12232 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5305414519974957e-06, - "loss": 0.3986, - "step": 12233 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.529020675789625e-06, - "loss": 0.3887, - "step": 12234 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5275001570084e-06, - "loss": 0.5173, - "step": 12235 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5259798957143078e-06, - "loss": 0.395, - "step": 12236 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.524459891967825e-06, - "loss": 0.3922, - "step": 12237 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.522940145829421e-06, - "loss": 0.4595, - "step": 12238 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.521420657359554e-06, - "loss": 0.4729, - "step": 12239 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5199014266186727e-06, - "loss": 0.4795, - "step": 12240 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5183824536672128e-06, - "loss": 0.4399, - "step": 12241 - }, - { - "epoch": 2.21, - "grad_norm": 0.0, - "learning_rate": 3.5168637385656026e-06, - "loss": 0.4678, - "step": 12242 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5153452813742584e-06, - "loss": 0.349, - "step": 12243 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.513827082153588e-06, - "loss": 0.4782, - "step": 12244 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5123091409639886e-06, - "loss": 0.403, - "step": 12245 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5107914578658453e-06, - "loss": 0.4844, - "step": 12246 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.509274032919534e-06, - "loss": 0.4446, - "step": 12247 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.507756866185421e-06, - "loss": 0.4097, - "step": 12248 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5062399577238616e-06, - "loss": 0.4257, - "step": 12249 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.504723307595201e-06, - "loss": 0.4006, - "step": 12250 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5032069158597714e-06, - "loss": 0.5118, - "step": 12251 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.5016907825779036e-06, - "loss": 0.4357, - "step": 12252 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.500174907809909e-06, - "loss": 0.4346, - "step": 12253 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.498659291616093e-06, - "loss": 0.4679, - "step": 12254 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.497143934056747e-06, - "loss": 0.4713, - "step": 12255 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4956288351921496e-06, - "loss": 0.5419, - "step": 12256 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4941139950825855e-06, - "loss": 0.3812, - "step": 12257 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.49259941378831e-06, - "loss": 0.3872, - "step": 12258 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.491085091369578e-06, - "loss": 0.4344, - "step": 12259 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4895710278866314e-06, - "loss": 0.4215, - "step": 12260 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.488057223399702e-06, - "loss": 0.4541, - "step": 12261 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4865436779690132e-06, - "loss": 0.4574, - "step": 12262 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4850303916547746e-06, - "loss": 0.3953, - "step": 12263 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.483517364517187e-06, - "loss": 0.4318, - "step": 12264 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.482004596616442e-06, - "loss": 0.4443, - "step": 12265 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4804920880127204e-06, - "loss": 0.4363, - "step": 12266 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4789798387661887e-06, - "loss": 0.4535, - "step": 12267 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.477467848937014e-06, - "loss": 0.3821, - "step": 12268 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.475956118585345e-06, - "loss": 0.482, - "step": 12269 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4744446477713146e-06, - "loss": 0.4706, - "step": 12270 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4729334365550547e-06, - "loss": 0.5098, - "step": 12271 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4714224849966805e-06, - "loss": 0.4214, - "step": 12272 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.469911793156309e-06, - "loss": 0.3953, - "step": 12273 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4684013610940315e-06, - "loss": 0.4255, - "step": 12274 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4668911888699363e-06, - "loss": 0.4824, - "step": 12275 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4653812765441066e-06, - "loss": 0.3374, - "step": 12276 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.463871624176597e-06, - "loss": 0.4122, - "step": 12277 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.462362231827474e-06, - "loss": 0.4405, - "step": 12278 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4608530995567814e-06, - "loss": 0.4446, - "step": 12279 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4593442274245546e-06, - "loss": 0.4571, - "step": 12280 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4578356154908186e-06, - "loss": 0.4113, - "step": 12281 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.45632726381559e-06, - "loss": 0.4237, - "step": 12282 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.454819172458873e-06, - "loss": 0.4711, - "step": 12283 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4533113414806607e-06, - "loss": 0.4946, - "step": 12284 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4518037709409393e-06, - "loss": 0.4408, - "step": 12285 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4502964608996812e-06, - "loss": 0.436, - "step": 12286 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.448789411416851e-06, - "loss": 0.4346, - "step": 12287 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.447282622552399e-06, - "loss": 0.4157, - "step": 12288 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4457760943662676e-06, - "loss": 0.4675, - "step": 12289 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.444269826918395e-06, - "loss": 0.458, - "step": 12290 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4427638202686987e-06, - "loss": 0.5014, - "step": 12291 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.441258074477094e-06, - "loss": 0.4972, - "step": 12292 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4397525896034758e-06, - "loss": 0.3714, - "step": 12293 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4382473657077343e-06, - "loss": 0.4073, - "step": 12294 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4367424028497565e-06, - "loss": 0.5032, - "step": 12295 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.43523770108941e-06, - "loss": 0.4461, - "step": 12296 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.4337332604865537e-06, - "loss": 0.4308, - "step": 12297 - }, - { - "epoch": 2.22, - "grad_norm": 0.0, - "learning_rate": 3.432229081101036e-06, - "loss": 0.4074, - "step": 12298 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4307251629926985e-06, - "loss": 0.4833, - "step": 12299 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.429221506221366e-06, - "loss": 0.4355, - "step": 12300 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4277181108468594e-06, - "loss": 0.5126, - "step": 12301 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4262149769289855e-06, - "loss": 0.4759, - "step": 12302 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.42471210452754e-06, - "loss": 0.4376, - "step": 12303 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.423209493702313e-06, - "loss": 0.4257, - "step": 12304 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4217071445130744e-06, - "loss": 0.4702, - "step": 12305 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4202050570195977e-06, - "loss": 0.458, - "step": 12306 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4187032312816405e-06, - "loss": 0.4185, - "step": 12307 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.417201667358938e-06, - "loss": 0.4433, - "step": 12308 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.415700365311232e-06, - "loss": 0.4643, - "step": 12309 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4141993251982407e-06, - "loss": 0.4675, - "step": 12310 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4126985470796858e-06, - "loss": 0.3886, - "step": 12311 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.411198031015267e-06, - "loss": 0.5178, - "step": 12312 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4096977770646778e-06, - "loss": 0.44, - "step": 12313 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4081977852876047e-06, - "loss": 0.4872, - "step": 12314 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.406698055743709e-06, - "loss": 0.4623, - "step": 12315 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.405198588492664e-06, - "loss": 0.4488, - "step": 12316 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.403699383594116e-06, - "loss": 0.4325, - "step": 12317 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.4022004411077058e-06, - "loss": 0.4855, - "step": 12318 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.400701761093066e-06, - "loss": 0.5548, - "step": 12319 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3992033436098137e-06, - "loss": 0.4428, - "step": 12320 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3977051887175583e-06, - "loss": 0.5078, - "step": 12321 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3962072964759076e-06, - "loss": 0.4777, - "step": 12322 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.394709666944439e-06, - "loss": 0.4244, - "step": 12323 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3932123001827365e-06, - "loss": 0.4075, - "step": 12324 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3917151962503657e-06, - "loss": 0.544, - "step": 12325 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3902183552068822e-06, - "loss": 0.4962, - "step": 12326 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3887217771118396e-06, - "loss": 0.425, - "step": 12327 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3872254620247704e-06, - "loss": 0.4898, - "step": 12328 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.385729410005203e-06, - "loss": 0.4594, - "step": 12329 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.384233621112648e-06, - "loss": 0.4873, - "step": 12330 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3827380954066093e-06, - "loss": 0.4068, - "step": 12331 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3812428329465884e-06, - "loss": 0.4487, - "step": 12332 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3797478337920662e-06, - "loss": 0.429, - "step": 12333 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3782530980025163e-06, - "loss": 0.4541, - "step": 12334 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3767586256374018e-06, - "loss": 0.4849, - "step": 12335 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3752644167561766e-06, - "loss": 0.4891, - "step": 12336 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3737704714182805e-06, - "loss": 0.5218, - "step": 12337 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3722767896831463e-06, - "loss": 0.4521, - "step": 12338 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3707833716101957e-06, - "loss": 0.4483, - "step": 12339 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3692902172588394e-06, - "loss": 0.4506, - "step": 12340 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.367797326688477e-06, - "loss": 0.3946, - "step": 12341 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.366304699958498e-06, - "loss": 0.453, - "step": 12342 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3648123371282795e-06, - "loss": 0.4345, - "step": 12343 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.363320238257196e-06, - "loss": 0.4006, - "step": 12344 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3618284034046056e-06, - "loss": 0.5217, - "step": 12345 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3603368326298503e-06, - "loss": 0.398, - "step": 12346 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3588455259922705e-06, - "loss": 0.4439, - "step": 12347 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3573544835511885e-06, - "loss": 0.5411, - "step": 12348 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3558637053659283e-06, - "loss": 0.3337, - "step": 12349 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3543731914957924e-06, - "loss": 0.4267, - "step": 12350 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3528829420000752e-06, - "loss": 0.4531, - "step": 12351 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3513929569380653e-06, - "loss": 0.4187, - "step": 12352 - }, - { - "epoch": 2.23, - "grad_norm": 0.0, - "learning_rate": 3.3499032363690266e-06, - "loss": 0.4648, - "step": 12353 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.348413780352232e-06, - "loss": 0.407, - "step": 12354 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3469245889469328e-06, - "loss": 0.4479, - "step": 12355 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3454356622123695e-06, - "loss": 0.4043, - "step": 12356 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.343947000207777e-06, - "loss": 0.4454, - "step": 12357 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3424586029923746e-06, - "loss": 0.4184, - "step": 12358 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.340970470625371e-06, - "loss": 0.4603, - "step": 12359 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3394826031659753e-06, - "loss": 0.4273, - "step": 12360 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3379950006733697e-06, - "loss": 0.4394, - "step": 12361 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.336507663206735e-06, - "loss": 0.4511, - "step": 12362 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3350205908252407e-06, - "loss": 0.414, - "step": 12363 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3335337835880422e-06, - "loss": 0.4657, - "step": 12364 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3320472415542926e-06, - "loss": 0.4997, - "step": 12365 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.330560964783127e-06, - "loss": 0.5445, - "step": 12366 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3290749533336743e-06, - "loss": 0.3854, - "step": 12367 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.327589207265045e-06, - "loss": 0.4366, - "step": 12368 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.326103726636345e-06, - "loss": 0.4179, - "step": 12369 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.324618511506674e-06, - "loss": 0.408, - "step": 12370 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3231335619351147e-06, - "loss": 0.3656, - "step": 12371 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.32164887798074e-06, - "loss": 0.4701, - "step": 12372 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3201644597026138e-06, - "loss": 0.4533, - "step": 12373 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3186803071597884e-06, - "loss": 0.4289, - "step": 12374 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.317196420411306e-06, - "loss": 0.4162, - "step": 12375 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3157127995162e-06, - "loss": 0.4052, - "step": 12376 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3142294445334877e-06, - "loss": 0.4226, - "step": 12377 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3127463555221816e-06, - "loss": 0.4453, - "step": 12378 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3112635325412822e-06, - "loss": 0.4486, - "step": 12379 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.309780975649778e-06, - "loss": 0.4338, - "step": 12380 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3082986849066434e-06, - "loss": 0.481, - "step": 12381 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3068166603708574e-06, - "loss": 0.3947, - "step": 12382 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.305334902101368e-06, - "loss": 0.4965, - "step": 12383 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.303853410157124e-06, - "loss": 0.432, - "step": 12384 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.3023721845970623e-06, - "loss": 0.4212, - "step": 12385 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.300891225480106e-06, - "loss": 0.3757, - "step": 12386 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.299410532865177e-06, - "loss": 0.4819, - "step": 12387 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2979301068111746e-06, - "loss": 0.4082, - "step": 12388 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2964499473769952e-06, - "loss": 0.3677, - "step": 12389 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2949700546215225e-06, - "loss": 0.3843, - "step": 12390 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.293490428603622e-06, - "loss": 0.4891, - "step": 12391 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2920110693821637e-06, - "loss": 0.5411, - "step": 12392 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.290531977015996e-06, - "loss": 0.4014, - "step": 12393 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2890531515639613e-06, - "loss": 0.4499, - "step": 12394 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.287574593084888e-06, - "loss": 0.4433, - "step": 12395 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.286096301637597e-06, - "loss": 0.4789, - "step": 12396 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.284618277280893e-06, - "loss": 0.4736, - "step": 12397 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2831405200735843e-06, - "loss": 0.463, - "step": 12398 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.28166303007445e-06, - "loss": 0.4903, - "step": 12399 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.280185807342269e-06, - "loss": 0.3849, - "step": 12400 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.278708851935808e-06, - "loss": 0.4313, - "step": 12401 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2772321639138194e-06, - "loss": 0.5292, - "step": 12402 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2757557433350563e-06, - "loss": 0.4571, - "step": 12403 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.274279590258248e-06, - "loss": 0.4264, - "step": 12404 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2728037047421234e-06, - "loss": 0.4951, - "step": 12405 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.271328086845389e-06, - "loss": 0.4792, - "step": 12406 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2698527366267462e-06, - "loss": 0.4511, - "step": 12407 - }, - { - "epoch": 2.24, - "grad_norm": 0.0, - "learning_rate": 3.2683776541448943e-06, - "loss": 0.3958, - "step": 12408 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.266902839458511e-06, - "loss": 0.4268, - "step": 12409 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.265428292626267e-06, - "loss": 0.4434, - "step": 12410 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2639540137068227e-06, - "loss": 0.3785, - "step": 12411 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.262480002758828e-06, - "loss": 0.4132, - "step": 12412 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2610062598409196e-06, - "loss": 0.3995, - "step": 12413 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2595327850117277e-06, - "loss": 0.5004, - "step": 12414 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2580595783298676e-06, - "loss": 0.3684, - "step": 12415 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.256586639853948e-06, - "loss": 0.4563, - "step": 12416 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2551139696425636e-06, - "loss": 0.4915, - "step": 12417 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2536415677542966e-06, - "loss": 0.4549, - "step": 12418 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2521694342477294e-06, - "loss": 0.5153, - "step": 12419 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2506975691814248e-06, - "loss": 0.4537, - "step": 12420 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2492259726139286e-06, - "loss": 0.4183, - "step": 12421 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2477546446037887e-06, - "loss": 0.4174, - "step": 12422 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2462835852095333e-06, - "loss": 0.5164, - "step": 12423 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.24481279448969e-06, - "loss": 0.4497, - "step": 12424 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2433422725027653e-06, - "loss": 0.393, - "step": 12425 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2418720193072604e-06, - "loss": 0.4672, - "step": 12426 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.240402034961664e-06, - "loss": 0.5066, - "step": 12427 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2389323195244536e-06, - "loss": 0.4579, - "step": 12428 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.237462873054099e-06, - "loss": 0.4205, - "step": 12429 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2359936956090554e-06, - "loss": 0.4477, - "step": 12430 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.23452478724777e-06, - "loss": 0.5257, - "step": 12431 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.233056148028678e-06, - "loss": 0.4166, - "step": 12432 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2315877780102068e-06, - "loss": 0.3883, - "step": 12433 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.230119677250767e-06, - "loss": 0.4682, - "step": 12434 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2286518458087613e-06, - "loss": 0.4301, - "step": 12435 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2271842837425917e-06, - "loss": 0.4485, - "step": 12436 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.22571699111063e-06, - "loss": 0.4168, - "step": 12437 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2242499679712513e-06, - "loss": 0.437, - "step": 12438 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2227832143828163e-06, - "loss": 0.4582, - "step": 12439 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.221316730403673e-06, - "loss": 0.4184, - "step": 12440 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2198505160921643e-06, - "loss": 0.4545, - "step": 12441 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2183845715066185e-06, - "loss": 0.5406, - "step": 12442 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2169188967053544e-06, - "loss": 0.4169, - "step": 12443 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.215453491746674e-06, - "loss": 0.4076, - "step": 12444 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2139883566888717e-06, - "loss": 0.4879, - "step": 12445 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2125234915902426e-06, - "loss": 0.4488, - "step": 12446 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2110588965090562e-06, - "loss": 0.5369, - "step": 12447 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.209594571503576e-06, - "loss": 0.372, - "step": 12448 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2081305166320577e-06, - "loss": 0.3761, - "step": 12449 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.206666731952742e-06, - "loss": 0.3529, - "step": 12450 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.205203217523861e-06, - "loss": 0.3843, - "step": 12451 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2037399734036366e-06, - "loss": 0.5006, - "step": 12452 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2022769996502777e-06, - "loss": 0.3907, - "step": 12453 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.2008142963219857e-06, - "loss": 0.4242, - "step": 12454 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1993518634769492e-06, - "loss": 0.4542, - "step": 12455 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1978897011733425e-06, - "loss": 0.4389, - "step": 12456 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.196427809469338e-06, - "loss": 0.4206, - "step": 12457 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1949661884230943e-06, - "loss": 0.4924, - "step": 12458 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.19350483809275e-06, - "loss": 0.4529, - "step": 12459 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.192043758536444e-06, - "loss": 0.5617, - "step": 12460 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1905829498122964e-06, - "loss": 0.4353, - "step": 12461 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1891224119784283e-06, - "loss": 0.4554, - "step": 12462 - }, - { - "epoch": 2.25, - "grad_norm": 0.0, - "learning_rate": 3.1876621450929367e-06, - "loss": 0.4299, - "step": 12463 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.186202149213916e-06, - "loss": 0.3994, - "step": 12464 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1847424243994463e-06, - "loss": 0.4466, - "step": 12465 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1832829707075985e-06, - "loss": 0.4182, - "step": 12466 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.181823788196431e-06, - "loss": 0.4024, - "step": 12467 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1803648769239937e-06, - "loss": 0.4618, - "step": 12468 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.178906236948325e-06, - "loss": 0.4631, - "step": 12469 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1774478683274514e-06, - "loss": 0.48, - "step": 12470 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1759897711193876e-06, - "loss": 0.5039, - "step": 12471 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1745319453821423e-06, - "loss": 0.3792, - "step": 12472 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1730743911737092e-06, - "loss": 0.4434, - "step": 12473 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1716171085520707e-06, - "loss": 0.4143, - "step": 12474 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1701600975752024e-06, - "loss": 0.508, - "step": 12475 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.168703358301065e-06, - "loss": 0.4494, - "step": 12476 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.167246890787611e-06, - "loss": 0.4017, - "step": 12477 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1657906950927773e-06, - "loss": 0.4682, - "step": 12478 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.164334771274501e-06, - "loss": 0.3303, - "step": 12479 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1628791193906973e-06, - "loss": 0.4416, - "step": 12480 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1614237394992787e-06, - "loss": 0.4679, - "step": 12481 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1599686316581345e-06, - "loss": 0.4756, - "step": 12482 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.158513795925153e-06, - "loss": 0.4879, - "step": 12483 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.157059232358217e-06, - "loss": 0.3704, - "step": 12484 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.155604941015187e-06, - "loss": 0.4767, - "step": 12485 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1541509219539166e-06, - "loss": 0.4877, - "step": 12486 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.152697175232251e-06, - "loss": 0.4643, - "step": 12487 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.151243700908021e-06, - "loss": 0.4765, - "step": 12488 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1497904990390506e-06, - "loss": 0.4272, - "step": 12489 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1483375696831475e-06, - "loss": 0.496, - "step": 12490 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.146884912898114e-06, - "loss": 0.4312, - "step": 12491 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1454325287417385e-06, - "loss": 0.3798, - "step": 12492 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1439804172718e-06, - "loss": 0.4094, - "step": 12493 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1425285785460623e-06, - "loss": 0.4324, - "step": 12494 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1410770126222888e-06, - "loss": 0.461, - "step": 12495 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1396257195582246e-06, - "loss": 0.4514, - "step": 12496 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.138174699411598e-06, - "loss": 0.4369, - "step": 12497 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1367239522401373e-06, - "loss": 0.4997, - "step": 12498 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1352734781015525e-06, - "loss": 0.4148, - "step": 12499 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1338232770535516e-06, - "loss": 0.4028, - "step": 12500 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.132373349153822e-06, - "loss": 0.4594, - "step": 12501 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1309236944600475e-06, - "loss": 0.472, - "step": 12502 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1294743130298945e-06, - "loss": 0.3943, - "step": 12503 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1280252049210224e-06, - "loss": 0.4634, - "step": 12504 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.126576370191081e-06, - "loss": 0.3587, - "step": 12505 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1251278088977067e-06, - "loss": 0.4725, - "step": 12506 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1236795210985247e-06, - "loss": 0.4029, - "step": 12507 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.122231506851152e-06, - "loss": 0.4479, - "step": 12508 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1207837662131923e-06, - "loss": 0.3768, - "step": 12509 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.119336299242235e-06, - "loss": 0.3893, - "step": 12510 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1178891059958736e-06, - "loss": 0.4533, - "step": 12511 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1164421865316706e-06, - "loss": 0.4148, - "step": 12512 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.11499554090719e-06, - "loss": 0.4402, - "step": 12513 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.11354916917998e-06, - "loss": 0.4421, - "step": 12514 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1121030714075796e-06, - "loss": 0.4655, - "step": 12515 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1106572476475204e-06, - "loss": 0.4141, - "step": 12516 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1092116979573185e-06, - "loss": 0.489, - "step": 12517 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.1077664223944803e-06, - "loss": 0.382, - "step": 12518 - }, - { - "epoch": 2.26, - "grad_norm": 0.0, - "learning_rate": 3.106321421016504e-06, - "loss": 0.4685, - "step": 12519 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.104876693880867e-06, - "loss": 0.4755, - "step": 12520 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.1034322410450445e-06, - "loss": 0.395, - "step": 12521 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.1019880625665056e-06, - "loss": 0.4937, - "step": 12522 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.100544158502697e-06, - "loss": 0.4437, - "step": 12523 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.099100528911063e-06, - "loss": 0.4543, - "step": 12524 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0976571738490315e-06, - "loss": 0.4795, - "step": 12525 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0962140933740212e-06, - "loss": 0.3866, - "step": 12526 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0947712875434412e-06, - "loss": 0.5031, - "step": 12527 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0933287564146885e-06, - "loss": 0.3902, - "step": 12528 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0918865000451504e-06, - "loss": 0.3922, - "step": 12529 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0904445184922015e-06, - "loss": 0.4543, - "step": 12530 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0890028118132064e-06, - "loss": 0.4411, - "step": 12531 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0875613800655146e-06, - "loss": 0.3732, - "step": 12532 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.086120223306477e-06, - "loss": 0.5196, - "step": 12533 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.084679341593424e-06, - "loss": 0.4742, - "step": 12534 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.08323873498367e-06, - "loss": 0.3802, - "step": 12535 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0817984035345283e-06, - "loss": 0.4488, - "step": 12536 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0803583473032937e-06, - "loss": 0.3973, - "step": 12537 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.078918566347262e-06, - "loss": 0.4041, - "step": 12538 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.077479060723707e-06, - "loss": 0.4016, - "step": 12539 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.076039830489893e-06, - "loss": 0.3764, - "step": 12540 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0746008757030755e-06, - "loss": 0.5474, - "step": 12541 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0731621964204995e-06, - "loss": 0.4051, - "step": 12542 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.071723792699398e-06, - "loss": 0.3954, - "step": 12543 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0702856645969924e-06, - "loss": 0.4595, - "step": 12544 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0688478121704946e-06, - "loss": 0.457, - "step": 12545 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.067410235477104e-06, - "loss": 0.4192, - "step": 12546 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0659729345740107e-06, - "loss": 0.4051, - "step": 12547 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0645359095183903e-06, - "loss": 0.4002, - "step": 12548 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0630991603674178e-06, - "loss": 0.39, - "step": 12549 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0616626871782417e-06, - "loss": 0.4582, - "step": 12550 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0602264900080092e-06, - "loss": 0.4183, - "step": 12551 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0587905689138566e-06, - "loss": 0.3812, - "step": 12552 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.057354923952902e-06, - "loss": 0.4332, - "step": 12553 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.055919555182265e-06, - "loss": 0.5313, - "step": 12554 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.054484462659044e-06, - "loss": 0.4517, - "step": 12555 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.05304964644033e-06, - "loss": 0.436, - "step": 12556 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0516151065832056e-06, - "loss": 0.4257, - "step": 12557 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0501808431447275e-06, - "loss": 0.4028, - "step": 12558 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0487468561819654e-06, - "loss": 0.4328, - "step": 12559 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0473131457519613e-06, - "loss": 0.4287, - "step": 12560 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.045879711911751e-06, - "loss": 0.4155, - "step": 12561 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0444465547183586e-06, - "loss": 0.4275, - "step": 12562 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0430136742287973e-06, - "loss": 0.4928, - "step": 12563 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0415810705000713e-06, - "loss": 0.4135, - "step": 12564 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0401487435891697e-06, - "loss": 0.4111, - "step": 12565 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0387166935530745e-06, - "loss": 0.4128, - "step": 12566 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0372849204487543e-06, - "loss": 0.3961, - "step": 12567 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.035853424333168e-06, - "loss": 0.4234, - "step": 12568 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.034422205263262e-06, - "loss": 0.4575, - "step": 12569 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0329912632959713e-06, - "loss": 0.4776, - "step": 12570 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.031560598488226e-06, - "loss": 0.3842, - "step": 12571 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.030130210896941e-06, - "loss": 0.5083, - "step": 12572 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.028700100579013e-06, - "loss": 0.459, - "step": 12573 - }, - { - "epoch": 2.27, - "grad_norm": 0.0, - "learning_rate": 3.0272702675913366e-06, - "loss": 0.4645, - "step": 12574 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0258407119907917e-06, - "loss": 0.4058, - "step": 12575 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0244114338342534e-06, - "loss": 0.4179, - "step": 12576 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0229824331785785e-06, - "loss": 0.4526, - "step": 12577 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0215537100806147e-06, - "loss": 0.477, - "step": 12578 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0201252645972025e-06, - "loss": 0.4998, - "step": 12579 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.018697096785157e-06, - "loss": 0.4171, - "step": 12580 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0172692067013054e-06, - "loss": 0.4185, - "step": 12581 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0158415944024456e-06, - "loss": 0.4191, - "step": 12582 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.014414259945372e-06, - "loss": 0.4166, - "step": 12583 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.012987203386866e-06, - "loss": 0.4417, - "step": 12584 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0115604247836983e-06, - "loss": 0.532, - "step": 12585 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0101339241926263e-06, - "loss": 0.3854, - "step": 12586 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.008707701670407e-06, - "loss": 0.4437, - "step": 12587 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.007281757273769e-06, - "loss": 0.5075, - "step": 12588 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0058560910594413e-06, - "loss": 0.4099, - "step": 12589 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0044307030841414e-06, - "loss": 0.3743, - "step": 12590 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0030055934045677e-06, - "loss": 0.4553, - "step": 12591 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.001580762077422e-06, - "loss": 0.4049, - "step": 12592 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 3.0001562091593815e-06, - "loss": 0.4714, - "step": 12593 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.99873193470712e-06, - "loss": 0.5734, - "step": 12594 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9973079387772974e-06, - "loss": 0.4376, - "step": 12595 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.995884221426556e-06, - "loss": 0.4638, - "step": 12596 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9944607827115413e-06, - "loss": 0.4223, - "step": 12597 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.993037622688879e-06, - "loss": 0.446, - "step": 12598 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9916147414151832e-06, - "loss": 0.4493, - "step": 12599 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.990192138947059e-06, - "loss": 0.4602, - "step": 12600 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9887698153410993e-06, - "loss": 0.4106, - "step": 12601 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.987347770653888e-06, - "loss": 0.4686, - "step": 12602 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.985926004941996e-06, - "loss": 0.4057, - "step": 12603 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9845045182619827e-06, - "loss": 0.5086, - "step": 12604 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.983083310670397e-06, - "loss": 0.4135, - "step": 12605 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.981662382223779e-06, - "loss": 0.3933, - "step": 12606 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9802417329786547e-06, - "loss": 0.5051, - "step": 12607 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9788213629915363e-06, - "loss": 0.4743, - "step": 12608 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.977401272318935e-06, - "loss": 0.4681, - "step": 12609 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9759814610173442e-06, - "loss": 0.4221, - "step": 12610 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9745619291432415e-06, - "loss": 0.4833, - "step": 12611 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9731426767531e-06, - "loss": 0.4768, - "step": 12612 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9717237039033775e-06, - "loss": 0.5028, - "step": 12613 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9703050106505293e-06, - "loss": 0.4489, - "step": 12614 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9688865970509906e-06, - "loss": 0.5308, - "step": 12615 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.967468463161187e-06, - "loss": 0.4879, - "step": 12616 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.96605060903754e-06, - "loss": 0.5318, - "step": 12617 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9646330347364415e-06, - "loss": 0.5071, - "step": 12618 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.963215740314298e-06, - "loss": 0.4582, - "step": 12619 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.961798725827486e-06, - "loss": 0.4224, - "step": 12620 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9603819913323783e-06, - "loss": 0.4456, - "step": 12621 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9589655368853344e-06, - "loss": 0.4353, - "step": 12622 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.957549362542703e-06, - "loss": 0.5175, - "step": 12623 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.95613346836082e-06, - "loss": 0.5239, - "step": 12624 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9547178543960197e-06, - "loss": 0.5041, - "step": 12625 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9533025207046094e-06, - "loss": 0.44, - "step": 12626 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9518874673428976e-06, - "loss": 0.4489, - "step": 12627 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.950472694367176e-06, - "loss": 0.4249, - "step": 12628 - }, - { - "epoch": 2.28, - "grad_norm": 0.0, - "learning_rate": 2.9490582018337233e-06, - "loss": 0.5179, - "step": 12629 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9476439897988175e-06, - "loss": 0.4367, - "step": 12630 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9462300583187155e-06, - "loss": 0.4557, - "step": 12631 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9448164074496687e-06, - "loss": 0.4527, - "step": 12632 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9434030372479083e-06, - "loss": 0.4761, - "step": 12633 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.94198994776966e-06, - "loss": 0.4419, - "step": 12634 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.940577139071147e-06, - "loss": 0.4663, - "step": 12635 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.939164611208568e-06, - "loss": 0.5272, - "step": 12636 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9377523642381167e-06, - "loss": 0.375, - "step": 12637 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9363403982159743e-06, - "loss": 0.4772, - "step": 12638 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.934928713198312e-06, - "loss": 0.4408, - "step": 12639 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9335173092412896e-06, - "loss": 0.4483, - "step": 12640 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9321061864010524e-06, - "loss": 0.4563, - "step": 12641 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9306953447337404e-06, - "loss": 0.3769, - "step": 12642 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9292847842954776e-06, - "loss": 0.4812, - "step": 12643 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9278745051423783e-06, - "loss": 0.5008, - "step": 12644 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.926464507330543e-06, - "loss": 0.4388, - "step": 12645 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.925054790916072e-06, - "loss": 0.4271, - "step": 12646 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9236453559550404e-06, - "loss": 0.3913, - "step": 12647 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.922236202503522e-06, - "loss": 0.4815, - "step": 12648 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9208273306175695e-06, - "loss": 0.4857, - "step": 12649 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9194187403532303e-06, - "loss": 0.4889, - "step": 12650 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.918010431766546e-06, - "loss": 0.4577, - "step": 12651 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.916602404913539e-06, - "loss": 0.4511, - "step": 12652 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9151946598502223e-06, - "loss": 0.4261, - "step": 12653 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9137871966326003e-06, - "loss": 0.4248, - "step": 12654 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9123800153166615e-06, - "loss": 0.5139, - "step": 12655 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9109731159583887e-06, - "loss": 0.4725, - "step": 12656 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9095664986137485e-06, - "loss": 0.432, - "step": 12657 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9081601633387e-06, - "loss": 0.4114, - "step": 12658 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9067541101891895e-06, - "loss": 0.4025, - "step": 12659 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.905348339221151e-06, - "loss": 0.4892, - "step": 12660 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.903942850490511e-06, - "loss": 0.443, - "step": 12661 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9025376440531762e-06, - "loss": 0.4227, - "step": 12662 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.9011327199650584e-06, - "loss": 0.4862, - "step": 12663 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8997280782820403e-06, - "loss": 0.4734, - "step": 12664 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.898323719060002e-06, - "loss": 0.3814, - "step": 12665 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.896919642354812e-06, - "loss": 0.3961, - "step": 12666 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8955158482223232e-06, - "loss": 0.4629, - "step": 12667 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8941123367183888e-06, - "loss": 0.3992, - "step": 12668 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.892709107898838e-06, - "loss": 0.3706, - "step": 12669 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.891306161819497e-06, - "loss": 0.385, - "step": 12670 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.889903498536173e-06, - "loss": 0.463, - "step": 12671 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.888501118104664e-06, - "loss": 0.493, - "step": 12672 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.887099020580767e-06, - "loss": 0.4871, - "step": 12673 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.885697206020256e-06, - "loss": 0.4308, - "step": 12674 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.884295674478896e-06, - "loss": 0.3934, - "step": 12675 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8828944260124457e-06, - "loss": 0.4862, - "step": 12676 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8814934606766466e-06, - "loss": 0.4696, - "step": 12677 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8800927785272327e-06, - "loss": 0.4065, - "step": 12678 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8786923796199253e-06, - "loss": 0.4966, - "step": 12679 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.877292264010434e-06, - "loss": 0.4248, - "step": 12680 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.875892431754459e-06, - "loss": 0.4339, - "step": 12681 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.874492882907687e-06, - "loss": 0.4872, - "step": 12682 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8730936175257916e-06, - "loss": 0.3698, - "step": 12683 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8716946356644437e-06, - "loss": 0.3246, - "step": 12684 - }, - { - "epoch": 2.29, - "grad_norm": 0.0, - "learning_rate": 2.8702959373792995e-06, - "loss": 0.4143, - "step": 12685 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.868897522725992e-06, - "loss": 0.4623, - "step": 12686 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.867499391760158e-06, - "loss": 0.5482, - "step": 12687 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.866101544537413e-06, - "loss": 0.4997, - "step": 12688 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8647039811133735e-06, - "loss": 0.4241, - "step": 12689 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.863306701543633e-06, - "loss": 0.3778, - "step": 12690 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8619097058837777e-06, - "loss": 0.4361, - "step": 12691 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8605129941893816e-06, - "loss": 0.441, - "step": 12692 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8591165665160104e-06, - "loss": 0.3813, - "step": 12693 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8577204229192145e-06, - "loss": 0.4091, - "step": 12694 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.856324563454537e-06, - "loss": 0.4455, - "step": 12695 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8549289881775043e-06, - "loss": 0.458, - "step": 12696 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8535336971436378e-06, - "loss": 0.4107, - "step": 12697 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8521386904084424e-06, - "loss": 0.5311, - "step": 12698 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.850743968027415e-06, - "loss": 0.427, - "step": 12699 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8493495300560383e-06, - "loss": 0.3936, - "step": 12700 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8479553765497913e-06, - "loss": 0.3953, - "step": 12701 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8465615075641297e-06, - "loss": 0.4069, - "step": 12702 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.845167923154506e-06, - "loss": 0.4286, - "step": 12703 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8437746233763585e-06, - "loss": 0.4077, - "step": 12704 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.842381608285113e-06, - "loss": 0.3281, - "step": 12705 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8409888779361914e-06, - "loss": 0.4143, - "step": 12706 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.839596432384997e-06, - "loss": 0.4373, - "step": 12707 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8382042716869252e-06, - "loss": 0.4261, - "step": 12708 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8368123958973538e-06, - "loss": 0.4959, - "step": 12709 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8354208050716535e-06, - "loss": 0.3974, - "step": 12710 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8340294992651906e-06, - "loss": 0.4488, - "step": 12711 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8326384785333095e-06, - "loss": 0.4707, - "step": 12712 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.831247742931349e-06, - "loss": 0.4735, - "step": 12713 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.829857292514633e-06, - "loss": 0.4446, - "step": 12714 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8284671273384777e-06, - "loss": 0.4993, - "step": 12715 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8270772474581853e-06, - "loss": 0.4456, - "step": 12716 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8256876529290478e-06, - "loss": 0.4235, - "step": 12717 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8242983438063455e-06, - "loss": 0.504, - "step": 12718 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8229093201453484e-06, - "loss": 0.4904, - "step": 12719 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8215205820013137e-06, - "loss": 0.4186, - "step": 12720 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8201321294294837e-06, - "loss": 0.4594, - "step": 12721 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.818743962485101e-06, - "loss": 0.2893, - "step": 12722 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.817356081223389e-06, - "loss": 0.427, - "step": 12723 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.815968485699553e-06, - "loss": 0.5082, - "step": 12724 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.814581175968798e-06, - "loss": 0.4151, - "step": 12725 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8131941520863092e-06, - "loss": 0.4591, - "step": 12726 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.811807414107273e-06, - "loss": 0.528, - "step": 12727 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8104209620868516e-06, - "loss": 0.4044, - "step": 12728 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8090347960802e-06, - "loss": 0.431, - "step": 12729 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8076489161424638e-06, - "loss": 0.431, - "step": 12730 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.806263322328775e-06, - "loss": 0.3994, - "step": 12731 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8048780146942546e-06, - "loss": 0.4958, - "step": 12732 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8034929932940126e-06, - "loss": 0.4862, - "step": 12733 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.802108258183147e-06, - "loss": 0.4852, - "step": 12734 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.8007238094167466e-06, - "loss": 0.497, - "step": 12735 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.7993396470498847e-06, - "loss": 0.4074, - "step": 12736 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.797955771137625e-06, - "loss": 0.4654, - "step": 12737 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.7965721817350288e-06, - "loss": 0.5378, - "step": 12738 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.795188878897128e-06, - "loss": 0.4925, - "step": 12739 - }, - { - "epoch": 2.3, - "grad_norm": 0.0, - "learning_rate": 2.7938058626789568e-06, - "loss": 0.4122, - "step": 12740 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.792423133135532e-06, - "loss": 0.4211, - "step": 12741 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7910406903218603e-06, - "loss": 0.531, - "step": 12742 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7896585342929416e-06, - "loss": 0.3979, - "step": 12743 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7882766651037594e-06, - "loss": 0.4472, - "step": 12744 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7868950828092855e-06, - "loss": 0.4234, - "step": 12745 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7855137874644845e-06, - "loss": 0.3953, - "step": 12746 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.784132779124299e-06, - "loss": 0.3927, - "step": 12747 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.782752057843675e-06, - "loss": 0.382, - "step": 12748 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.781371623677539e-06, - "loss": 0.4665, - "step": 12749 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7799914766808045e-06, - "loss": 0.4486, - "step": 12750 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7786116169083786e-06, - "loss": 0.5312, - "step": 12751 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7772320444151533e-06, - "loss": 0.4386, - "step": 12752 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7758527592560113e-06, - "loss": 0.4944, - "step": 12753 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.774473761485822e-06, - "loss": 0.3456, - "step": 12754 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7730950511594435e-06, - "loss": 0.4318, - "step": 12755 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7717166283317255e-06, - "loss": 0.3777, - "step": 12756 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.770338493057502e-06, - "loss": 0.5164, - "step": 12757 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7689606453915985e-06, - "loss": 0.3933, - "step": 12758 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.767583085388824e-06, - "loss": 0.4563, - "step": 12759 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.766205813103989e-06, - "loss": 0.3795, - "step": 12760 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7648288285918814e-06, - "loss": 0.4651, - "step": 12761 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.763452131907274e-06, - "loss": 0.3762, - "step": 12762 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.762075723104938e-06, - "loss": 0.5349, - "step": 12763 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.760699602239625e-06, - "loss": 0.4002, - "step": 12764 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7593237693660878e-06, - "loss": 0.4017, - "step": 12765 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7579482245390542e-06, - "loss": 0.4388, - "step": 12766 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.756572967813247e-06, - "loss": 0.4413, - "step": 12767 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7551979992433754e-06, - "loss": 0.447, - "step": 12768 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.753823318884139e-06, - "loss": 0.4292, - "step": 12769 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7524489267902245e-06, - "loss": 0.3734, - "step": 12770 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7510748230163063e-06, - "loss": 0.4405, - "step": 12771 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7497010076170504e-06, - "loss": 0.4714, - "step": 12772 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7483274806471096e-06, - "loss": 0.4379, - "step": 12773 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.746954242161124e-06, - "loss": 0.4123, - "step": 12774 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7455812922137203e-06, - "loss": 0.4454, - "step": 12775 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7442086308595264e-06, - "loss": 0.4567, - "step": 12776 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.74283625815314e-06, - "loss": 0.4779, - "step": 12777 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7414641741491597e-06, - "loss": 0.4093, - "step": 12778 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.740092378902169e-06, - "loss": 0.4085, - "step": 12779 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.738720872466737e-06, - "loss": 0.4556, - "step": 12780 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.737349654897432e-06, - "loss": 0.4692, - "step": 12781 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.735978726248798e-06, - "loss": 0.4211, - "step": 12782 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7346080865753754e-06, - "loss": 0.4061, - "step": 12783 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.733237735931692e-06, - "loss": 0.4981, - "step": 12784 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.731867674372254e-06, - "loss": 0.547, - "step": 12785 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7304979019515744e-06, - "loss": 0.464, - "step": 12786 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7291284187241405e-06, - "loss": 0.4023, - "step": 12787 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.727759224744435e-06, - "loss": 0.379, - "step": 12788 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7263903200669252e-06, - "loss": 0.4395, - "step": 12789 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.725021704746068e-06, - "loss": 0.4271, - "step": 12790 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7236533788363116e-06, - "loss": 0.5432, - "step": 12791 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.722285342392088e-06, - "loss": 0.4137, - "step": 12792 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7209175954678214e-06, - "loss": 0.4189, - "step": 12793 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.7195501381179213e-06, - "loss": 0.3578, - "step": 12794 - }, - { - "epoch": 2.31, - "grad_norm": 0.0, - "learning_rate": 2.71818297039679e-06, - "loss": 0.5038, - "step": 12795 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7168160923588136e-06, - "loss": 0.4122, - "step": 12796 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7154495040583685e-06, - "loss": 0.4127, - "step": 12797 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7140832055498234e-06, - "loss": 0.3749, - "step": 12798 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.712717196887532e-06, - "loss": 0.4532, - "step": 12799 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.711351478125832e-06, - "loss": 0.4925, - "step": 12800 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7099860493190566e-06, - "loss": 0.4219, - "step": 12801 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.708620910521522e-06, - "loss": 0.4996, - "step": 12802 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.707256061787541e-06, - "loss": 0.465, - "step": 12803 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.705891503171406e-06, - "loss": 0.4994, - "step": 12804 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7045272347274044e-06, - "loss": 0.3962, - "step": 12805 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7031632565098053e-06, - "loss": 0.3545, - "step": 12806 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.7017995685728736e-06, - "loss": 0.4765, - "step": 12807 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.700436170970857e-06, - "loss": 0.3949, - "step": 12808 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6990730637579945e-06, - "loss": 0.4145, - "step": 12809 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6977102469885117e-06, - "loss": 0.3955, - "step": 12810 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6963477207166257e-06, - "loss": 0.3548, - "step": 12811 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6949854849965397e-06, - "loss": 0.4852, - "step": 12812 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6936235398824418e-06, - "loss": 0.4063, - "step": 12813 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.692261885428521e-06, - "loss": 0.4362, - "step": 12814 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6909005216889394e-06, - "loss": 0.4395, - "step": 12815 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.689539448717856e-06, - "loss": 0.4215, - "step": 12816 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6881786665694165e-06, - "loss": 0.429, - "step": 12817 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.686818175297753e-06, - "loss": 0.462, - "step": 12818 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6854579749569933e-06, - "loss": 0.4358, - "step": 12819 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.684098065601246e-06, - "loss": 0.3897, - "step": 12820 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6827384472846107e-06, - "loss": 0.4991, - "step": 12821 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.681379120061178e-06, - "loss": 0.4502, - "step": 12822 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6800200839850153e-06, - "loss": 0.471, - "step": 12823 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6786613391101968e-06, - "loss": 0.5327, - "step": 12824 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6773028854907725e-06, - "loss": 0.4375, - "step": 12825 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6759447231807833e-06, - "loss": 0.4852, - "step": 12826 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.67458685223426e-06, - "loss": 0.4403, - "step": 12827 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.673229272705221e-06, - "loss": 0.4953, - "step": 12828 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6718719846476725e-06, - "loss": 0.4639, - "step": 12829 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.67051498811561e-06, - "loss": 0.3989, - "step": 12830 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.669158283163018e-06, - "loss": 0.4399, - "step": 12831 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.667801869843868e-06, - "loss": 0.3933, - "step": 12832 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6664457482121207e-06, - "loss": 0.3789, - "step": 12833 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6650899183217215e-06, - "loss": 0.4213, - "step": 12834 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6637343802266124e-06, - "loss": 0.3583, - "step": 12835 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6623791339807194e-06, - "loss": 0.4462, - "step": 12836 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6610241796379553e-06, - "loss": 0.4751, - "step": 12837 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.65966951725222e-06, - "loss": 0.453, - "step": 12838 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.658315146877404e-06, - "loss": 0.3926, - "step": 12839 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6569610685673896e-06, - "loss": 0.412, - "step": 12840 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6556072823760436e-06, - "loss": 0.4179, - "step": 12841 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.654253788357223e-06, - "loss": 0.5214, - "step": 12842 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6529005865647684e-06, - "loss": 0.4193, - "step": 12843 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.651547677052516e-06, - "loss": 0.4585, - "step": 12844 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6501950598742854e-06, - "loss": 0.504, - "step": 12845 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6488427350838864e-06, - "loss": 0.3977, - "step": 12846 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.647490702735117e-06, - "loss": 0.4321, - "step": 12847 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6461389628817626e-06, - "loss": 0.553, - "step": 12848 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6447875155775983e-06, - "loss": 0.5075, - "step": 12849 - }, - { - "epoch": 2.32, - "grad_norm": 0.0, - "learning_rate": 2.6434363608763858e-06, - "loss": 0.4135, - "step": 12850 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6420854988318755e-06, - "loss": 0.5144, - "step": 12851 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6407349294978148e-06, - "loss": 0.4286, - "step": 12852 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.639384652927922e-06, - "loss": 0.4286, - "step": 12853 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.638034669175917e-06, - "loss": 0.4293, - "step": 12854 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.636684978295506e-06, - "loss": 0.4648, - "step": 12855 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6353355803403767e-06, - "loss": 0.4116, - "step": 12856 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6339864753642177e-06, - "loss": 0.4737, - "step": 12857 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6326376634206954e-06, - "loss": 0.4201, - "step": 12858 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6312891445634668e-06, - "loss": 0.3619, - "step": 12859 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6299409188461834e-06, - "loss": 0.4759, - "step": 12860 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.628592986322469e-06, - "loss": 0.355, - "step": 12861 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.627245347045958e-06, - "loss": 0.42, - "step": 12862 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6258980010702563e-06, - "loss": 0.391, - "step": 12863 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.624550948448964e-06, - "loss": 0.399, - "step": 12864 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.623204189235671e-06, - "loss": 0.4788, - "step": 12865 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6218577234839526e-06, - "loss": 0.4395, - "step": 12866 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6205115512473724e-06, - "loss": 0.4563, - "step": 12867 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6191656725794856e-06, - "loss": 0.5329, - "step": 12868 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.617820087533831e-06, - "loss": 0.475, - "step": 12869 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.61647479616394e-06, - "loss": 0.4579, - "step": 12870 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6151297985233313e-06, - "loss": 0.4457, - "step": 12871 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6137850946655076e-06, - "loss": 0.3499, - "step": 12872 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6124406846439678e-06, - "loss": 0.4724, - "step": 12873 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6110965685121937e-06, - "loss": 0.3735, - "step": 12874 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6097527463236584e-06, - "loss": 0.4134, - "step": 12875 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6084092181318166e-06, - "loss": 0.4401, - "step": 12876 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.607065983990116e-06, - "loss": 0.4457, - "step": 12877 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6057230439519974e-06, - "loss": 0.4213, - "step": 12878 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6043803980708838e-06, - "loss": 0.501, - "step": 12879 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.603038046400187e-06, - "loss": 0.4025, - "step": 12880 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.601695988993309e-06, - "loss": 0.426, - "step": 12881 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.6003542259036376e-06, - "loss": 0.4512, - "step": 12882 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5990127571845524e-06, - "loss": 0.4835, - "step": 12883 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5976715828894174e-06, - "loss": 0.4599, - "step": 12884 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.596330703071589e-06, - "loss": 0.5694, - "step": 12885 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5949901177844073e-06, - "loss": 0.4608, - "step": 12886 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.593649827081205e-06, - "loss": 0.4202, - "step": 12887 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5923098310152995e-06, - "loss": 0.5393, - "step": 12888 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.590970129639996e-06, - "loss": 0.4803, - "step": 12889 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5896307230085994e-06, - "loss": 0.4344, - "step": 12890 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.588291611174385e-06, - "loss": 0.5118, - "step": 12891 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.586952794190627e-06, - "loss": 0.4779, - "step": 12892 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5856142721105848e-06, - "loss": 0.4965, - "step": 12893 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.584276044987507e-06, - "loss": 0.4181, - "step": 12894 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5829381128746344e-06, - "loss": 0.4073, - "step": 12895 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.58160047582519e-06, - "loss": 0.4028, - "step": 12896 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.580263133892389e-06, - "loss": 0.4201, - "step": 12897 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.57892608712943e-06, - "loss": 0.4282, - "step": 12898 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5775893355895e-06, - "loss": 0.4017, - "step": 12899 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5762528793257846e-06, - "loss": 0.3668, - "step": 12900 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5749167183914482e-06, - "loss": 0.3938, - "step": 12901 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5735808528396432e-06, - "loss": 0.4452, - "step": 12902 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5722452827235155e-06, - "loss": 0.4645, - "step": 12903 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5709100080961934e-06, - "loss": 0.5089, - "step": 12904 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.569575029010799e-06, - "loss": 0.345, - "step": 12905 - }, - { - "epoch": 2.33, - "grad_norm": 0.0, - "learning_rate": 2.5682403455204386e-06, - "loss": 0.4022, - "step": 12906 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5669059576782085e-06, - "loss": 0.4788, - "step": 12907 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.565571865537193e-06, - "loss": 0.3996, - "step": 12908 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5642380691504655e-06, - "loss": 0.4437, - "step": 12909 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.562904568571082e-06, - "loss": 0.455, - "step": 12910 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.561571363852098e-06, - "loss": 0.3938, - "step": 12911 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5602384550465478e-06, - "loss": 0.4159, - "step": 12912 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.55890584220746e-06, - "loss": 0.3582, - "step": 12913 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.557573525387842e-06, - "loss": 0.5101, - "step": 12914 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.556241504640696e-06, - "loss": 0.4723, - "step": 12915 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5549097800190182e-06, - "loss": 0.4241, - "step": 12916 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.553578351575783e-06, - "loss": 0.4946, - "step": 12917 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.552247219363958e-06, - "loss": 0.3511, - "step": 12918 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.550916383436497e-06, - "loss": 0.4247, - "step": 12919 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5495858438463427e-06, - "loss": 0.4281, - "step": 12920 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5482556006464277e-06, - "loss": 0.433, - "step": 12921 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.546925653889669e-06, - "loss": 0.4017, - "step": 12922 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5455960036289773e-06, - "loss": 0.3966, - "step": 12923 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.544266649917245e-06, - "loss": 0.5123, - "step": 12924 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5429375928073594e-06, - "loss": 0.505, - "step": 12925 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.541608832352187e-06, - "loss": 0.4802, - "step": 12926 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.540280368604595e-06, - "loss": 0.3848, - "step": 12927 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5389522016174327e-06, - "loss": 0.4944, - "step": 12928 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5376243314435314e-06, - "loss": 0.43, - "step": 12929 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5362967581357155e-06, - "loss": 0.4932, - "step": 12930 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5349694817467997e-06, - "loss": 0.412, - "step": 12931 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5336425023295886e-06, - "loss": 0.4071, - "step": 12932 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.532315819936869e-06, - "loss": 0.4421, - "step": 12933 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5309894346214194e-06, - "loss": 0.3941, - "step": 12934 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5296633464360076e-06, - "loss": 0.4188, - "step": 12935 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.528337555433379e-06, - "loss": 0.5081, - "step": 12936 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5270120616662864e-06, - "loss": 0.4829, - "step": 12937 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5256868651874542e-06, - "loss": 0.4293, - "step": 12938 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.524361966049604e-06, - "loss": 0.4252, - "step": 12939 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5230373643054408e-06, - "loss": 0.4422, - "step": 12940 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.52171306000766e-06, - "loss": 0.4104, - "step": 12941 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.520389053208945e-06, - "loss": 0.4202, - "step": 12942 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.519065343961967e-06, - "loss": 0.4967, - "step": 12943 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.517741932319384e-06, - "loss": 0.3965, - "step": 12944 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5164188183338466e-06, - "loss": 0.416, - "step": 12945 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5150960020579874e-06, - "loss": 0.471, - "step": 12946 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5137734835444317e-06, - "loss": 0.5022, - "step": 12947 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5124512628457888e-06, - "loss": 0.4631, - "step": 12948 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.511129340014664e-06, - "loss": 0.5133, - "step": 12949 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.509807715103646e-06, - "loss": 0.3913, - "step": 12950 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5084863881653065e-06, - "loss": 0.4202, - "step": 12951 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.507165359252212e-06, - "loss": 0.393, - "step": 12952 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.505844628416911e-06, - "loss": 0.4908, - "step": 12953 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5045241957119538e-06, - "loss": 0.383, - "step": 12954 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5032040611898635e-06, - "loss": 0.4147, - "step": 12955 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5018842249031583e-06, - "loss": 0.4034, - "step": 12956 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.5005646869043443e-06, - "loss": 0.4093, - "step": 12957 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.499245447245914e-06, - "loss": 0.4392, - "step": 12958 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.4979265059803493e-06, - "loss": 0.4595, - "step": 12959 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.49660786316012e-06, - "loss": 0.3935, - "step": 12960 - }, - { - "epoch": 2.34, - "grad_norm": 0.0, - "learning_rate": 2.4952895188376838e-06, - "loss": 0.4218, - "step": 12961 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.493971473065486e-06, - "loss": 0.5063, - "step": 12962 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4926537258959627e-06, - "loss": 0.4408, - "step": 12963 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.491336277381532e-06, - "loss": 0.467, - "step": 12964 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.49001912757461e-06, - "loss": 0.3857, - "step": 12965 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4887022765275946e-06, - "loss": 0.4327, - "step": 12966 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.487385724292869e-06, - "loss": 0.4034, - "step": 12967 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4860694709228075e-06, - "loss": 0.4035, - "step": 12968 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4847535164697724e-06, - "loss": 0.425, - "step": 12969 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4834378609861186e-06, - "loss": 0.4077, - "step": 12970 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.482122504524185e-06, - "loss": 0.3678, - "step": 12971 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4808074471362953e-06, - "loss": 0.4201, - "step": 12972 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4794926888747706e-06, - "loss": 0.3933, - "step": 12973 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4781782297919033e-06, - "loss": 0.4301, - "step": 12974 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4768640699399948e-06, - "loss": 0.4502, - "step": 12975 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4755502093713225e-06, - "loss": 0.4214, - "step": 12976 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.474236648138152e-06, - "loss": 0.4987, - "step": 12977 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4729233862927405e-06, - "loss": 0.4893, - "step": 12978 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4716104238873305e-06, - "loss": 0.3852, - "step": 12979 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.470297760974155e-06, - "loss": 0.4519, - "step": 12980 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4689853976054336e-06, - "loss": 0.4191, - "step": 12981 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.467673333833375e-06, - "loss": 0.4668, - "step": 12982 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.466361569710174e-06, - "loss": 0.416, - "step": 12983 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.465050105288016e-06, - "loss": 0.4221, - "step": 12984 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4637389406190727e-06, - "loss": 0.5086, - "step": 12985 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.462428075755502e-06, - "loss": 0.4133, - "step": 12986 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.461117510749458e-06, - "loss": 0.5131, - "step": 12987 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4598072456530764e-06, - "loss": 0.3863, - "step": 12988 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4584972805184783e-06, - "loss": 0.447, - "step": 12989 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.457187615397776e-06, - "loss": 0.4235, - "step": 12990 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.45587825034307e-06, - "loss": 0.4605, - "step": 12991 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4545691854064535e-06, - "loss": 0.4365, - "step": 12992 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4532604206400014e-06, - "loss": 0.52, - "step": 12993 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.451951956095777e-06, - "loss": 0.5011, - "step": 12994 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.450643791825835e-06, - "loss": 0.4072, - "step": 12995 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.449335927882216e-06, - "loss": 0.4462, - "step": 12996 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.448028364316948e-06, - "loss": 0.4119, - "step": 12997 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.446721101182049e-06, - "loss": 0.4206, - "step": 12998 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.445414138529525e-06, - "loss": 0.4126, - "step": 12999 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.444107476411368e-06, - "loss": 0.5128, - "step": 13000 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4428011148795596e-06, - "loss": 0.4717, - "step": 13001 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.441495053986065e-06, - "loss": 0.4591, - "step": 13002 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.440189293782852e-06, - "loss": 0.5273, - "step": 13003 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.438883834321857e-06, - "loss": 0.5071, - "step": 13004 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4375786756550157e-06, - "loss": 0.4428, - "step": 13005 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4362738178342504e-06, - "loss": 0.4134, - "step": 13006 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4349692609114674e-06, - "loss": 0.424, - "step": 13007 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4336650049385678e-06, - "loss": 0.481, - "step": 13008 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4323610499674377e-06, - "loss": 0.4275, - "step": 13009 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4310573960499495e-06, - "loss": 0.4307, - "step": 13010 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.429754043237966e-06, - "loss": 0.4086, - "step": 13011 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.42845099158333e-06, - "loss": 0.3982, - "step": 13012 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4271482411378876e-06, - "loss": 0.4632, - "step": 13013 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.4258457919534604e-06, - "loss": 0.4207, - "step": 13014 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.424543644081864e-06, - "loss": 0.3922, - "step": 13015 - }, - { - "epoch": 2.35, - "grad_norm": 0.0, - "learning_rate": 2.423241797574898e-06, - "loss": 0.402, - "step": 13016 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.421940252484353e-06, - "loss": 0.4711, - "step": 13017 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.420639008862007e-06, - "loss": 0.4677, - "step": 13018 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4193380667596243e-06, - "loss": 0.3942, - "step": 13019 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.418037426228962e-06, - "loss": 0.4521, - "step": 13020 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.416737087321758e-06, - "loss": 0.4793, - "step": 13021 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.415437050089743e-06, - "loss": 0.4709, - "step": 13022 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.414137314584636e-06, - "loss": 0.5112, - "step": 13023 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4128378808581386e-06, - "loss": 0.4038, - "step": 13024 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4115387489619512e-06, - "loss": 0.4011, - "step": 13025 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4102399189477553e-06, - "loss": 0.4481, - "step": 13026 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4089413908672143e-06, - "loss": 0.4669, - "step": 13027 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4076431647719887e-06, - "loss": 0.4233, - "step": 13028 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.406345240713721e-06, - "loss": 0.4283, - "step": 13029 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.405047618744052e-06, - "loss": 0.3653, - "step": 13030 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.4037502989146e-06, - "loss": 0.495, - "step": 13031 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.402453281276973e-06, - "loss": 0.4022, - "step": 13032 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.401156565882771e-06, - "loss": 0.393, - "step": 13033 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3998601527835774e-06, - "loss": 0.4323, - "step": 13034 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3985640420309663e-06, - "loss": 0.3902, - "step": 13035 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.397268233676501e-06, - "loss": 0.3859, - "step": 13036 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.395972727771728e-06, - "loss": 0.4464, - "step": 13037 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3946775243681873e-06, - "loss": 0.3688, - "step": 13038 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.393382623517404e-06, - "loss": 0.3826, - "step": 13039 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3920880252708867e-06, - "loss": 0.4601, - "step": 13040 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3907937296801476e-06, - "loss": 0.4316, - "step": 13041 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3894997367966655e-06, - "loss": 0.5168, - "step": 13042 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3882060466719216e-06, - "loss": 0.4486, - "step": 13043 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3869126593573812e-06, - "loss": 0.3644, - "step": 13044 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.385619574904493e-06, - "loss": 0.3835, - "step": 13045 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3843267933647064e-06, - "loss": 0.4156, - "step": 13046 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.383034314789446e-06, - "loss": 0.4316, - "step": 13047 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.38174213923013e-06, - "loss": 0.3813, - "step": 13048 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3804502667381648e-06, - "loss": 0.4471, - "step": 13049 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.379158697364934e-06, - "loss": 0.5889, - "step": 13050 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.37786743116183e-06, - "loss": 0.4098, - "step": 13051 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.376576468180216e-06, - "loss": 0.4941, - "step": 13052 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.37528580847145e-06, - "loss": 0.4047, - "step": 13053 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.373995452086877e-06, - "loss": 0.4755, - "step": 13054 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3727053990778283e-06, - "loss": 0.4668, - "step": 13055 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3714156494956254e-06, - "loss": 0.5446, - "step": 13056 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.370126203391576e-06, - "loss": 0.4487, - "step": 13057 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3688370608169775e-06, - "loss": 0.4241, - "step": 13058 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3675482218231137e-06, - "loss": 0.4542, - "step": 13059 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3662596864612562e-06, - "loss": 0.4099, - "step": 13060 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.364971454782663e-06, - "loss": 0.4258, - "step": 13061 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.363683526838588e-06, - "loss": 0.5525, - "step": 13062 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3623959026802634e-06, - "loss": 0.4455, - "step": 13063 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3611085823589177e-06, - "loss": 0.4263, - "step": 13064 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.359821565925755e-06, - "loss": 0.5088, - "step": 13065 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3585348534319763e-06, - "loss": 0.5144, - "step": 13066 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3572484449287746e-06, - "loss": 0.4562, - "step": 13067 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3559623404673214e-06, - "loss": 0.5084, - "step": 13068 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.3546765400987824e-06, - "loss": 0.3693, - "step": 13069 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.353391043874308e-06, - "loss": 0.4358, - "step": 13070 - }, - { - "epoch": 2.36, - "grad_norm": 0.0, - "learning_rate": 2.352105851845037e-06, - "loss": 0.3754, - "step": 13071 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3508209640620962e-06, - "loss": 0.5029, - "step": 13072 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3495363805766015e-06, - "loss": 0.4338, - "step": 13073 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3482521014396552e-06, - "loss": 0.4053, - "step": 13074 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.346968126702349e-06, - "loss": 0.4606, - "step": 13075 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3456844564157612e-06, - "loss": 0.4107, - "step": 13076 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.344401090630959e-06, - "loss": 0.4849, - "step": 13077 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.343118029398992e-06, - "loss": 0.4394, - "step": 13078 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3418352727709124e-06, - "loss": 0.4093, - "step": 13079 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3405528207977424e-06, - "loss": 0.4597, - "step": 13080 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3392706735305027e-06, - "loss": 0.4794, - "step": 13081 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3379888310201993e-06, - "loss": 0.3996, - "step": 13082 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3367072933178225e-06, - "loss": 0.4451, - "step": 13083 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3354260604743615e-06, - "loss": 0.4361, - "step": 13084 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3341451325407815e-06, - "loss": 0.4396, - "step": 13085 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3328645095680402e-06, - "loss": 0.3378, - "step": 13086 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3315841916070856e-06, - "loss": 0.3928, - "step": 13087 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.330304178708843e-06, - "loss": 0.4189, - "step": 13088 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3290244709242426e-06, - "loss": 0.512, - "step": 13089 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.327745068304189e-06, - "loss": 0.3866, - "step": 13090 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.326465970899581e-06, - "loss": 0.4643, - "step": 13091 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.325187178761301e-06, - "loss": 0.4444, - "step": 13092 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.323908691940224e-06, - "loss": 0.4658, - "step": 13093 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.322630510487208e-06, - "loss": 0.4704, - "step": 13094 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3213526344531036e-06, - "loss": 0.4491, - "step": 13095 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.320075063888745e-06, - "loss": 0.5414, - "step": 13096 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.318797798844956e-06, - "loss": 0.4957, - "step": 13097 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3175208393725502e-06, - "loss": 0.5099, - "step": 13098 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.316244185522323e-06, - "loss": 0.4557, - "step": 13099 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.314967837345068e-06, - "loss": 0.3605, - "step": 13100 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3136917948915573e-06, - "loss": 0.5061, - "step": 13101 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3124160582125575e-06, - "loss": 0.3932, - "step": 13102 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.311140627358813e-06, - "loss": 0.4836, - "step": 13103 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3098655023810646e-06, - "loss": 0.4376, - "step": 13104 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.308590683330042e-06, - "loss": 0.3888, - "step": 13105 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3073161702564595e-06, - "loss": 0.4508, - "step": 13106 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3060419632110165e-06, - "loss": 0.5177, - "step": 13107 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3047680622444058e-06, - "loss": 0.4012, - "step": 13108 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3034944674073034e-06, - "loss": 0.5143, - "step": 13109 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.302221178750377e-06, - "loss": 0.4442, - "step": 13110 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.3009481963242787e-06, - "loss": 0.4532, - "step": 13111 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2996755201796497e-06, - "loss": 0.5542, - "step": 13112 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.29840315036712e-06, - "loss": 0.4189, - "step": 13113 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2971310869373075e-06, - "loss": 0.4972, - "step": 13114 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.295859329940815e-06, - "loss": 0.411, - "step": 13115 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.294587879428234e-06, - "loss": 0.4259, - "step": 13116 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.293316735450153e-06, - "loss": 0.4404, - "step": 13117 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.292045898057131e-06, - "loss": 0.4762, - "step": 13118 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2907753672997292e-06, - "loss": 0.4755, - "step": 13119 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.289505143228489e-06, - "loss": 0.4196, - "step": 13120 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.28823522589394e-06, - "loss": 0.4773, - "step": 13121 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.286965615346608e-06, - "loss": 0.4962, - "step": 13122 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.285696311636997e-06, - "loss": 0.4051, - "step": 13123 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2844273148156015e-06, - "loss": 0.4651, - "step": 13124 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2831586249329075e-06, - "loss": 0.4495, - "step": 13125 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.2818902420393774e-06, - "loss": 0.4406, - "step": 13126 - }, - { - "epoch": 2.37, - "grad_norm": 0.0, - "learning_rate": 2.280622166185479e-06, - "loss": 0.4883, - "step": 13127 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.279354397421655e-06, - "loss": 0.4932, - "step": 13128 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2780869357983383e-06, - "loss": 0.4356, - "step": 13129 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.276819781365952e-06, - "loss": 0.4704, - "step": 13130 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.275552934174906e-06, - "loss": 0.427, - "step": 13131 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2742863942755965e-06, - "loss": 0.3696, - "step": 13132 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2730201617184087e-06, - "loss": 0.5273, - "step": 13133 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.271754236553716e-06, - "loss": 0.5544, - "step": 13134 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2704886188318796e-06, - "loss": 0.513, - "step": 13135 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2692233086032468e-06, - "loss": 0.4954, - "step": 13136 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.267958305918152e-06, - "loss": 0.4391, - "step": 13137 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2666936108269245e-06, - "loss": 0.3537, - "step": 13138 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2654292233798724e-06, - "loss": 0.4051, - "step": 13139 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.264165143627299e-06, - "loss": 0.3847, - "step": 13140 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.262901371619486e-06, - "loss": 0.4765, - "step": 13141 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2616379074067063e-06, - "loss": 0.4667, - "step": 13142 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2603747510392316e-06, - "loss": 0.491, - "step": 13143 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2591119025673082e-06, - "loss": 0.4629, - "step": 13144 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2578493620411733e-06, - "loss": 0.4515, - "step": 13145 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2565871295110543e-06, - "loss": 0.429, - "step": 13146 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2553252050271636e-06, - "loss": 0.4245, - "step": 13147 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2540635886397045e-06, - "loss": 0.4795, - "step": 13148 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2528022803988658e-06, - "loss": 0.4372, - "step": 13149 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.251541280354823e-06, - "loss": 0.4095, - "step": 13150 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2502805885577426e-06, - "loss": 0.4763, - "step": 13151 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.249020205057776e-06, - "loss": 0.4814, - "step": 13152 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.247760129905061e-06, - "loss": 0.432, - "step": 13153 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.246500363149732e-06, - "loss": 0.4804, - "step": 13154 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2452409048419023e-06, - "loss": 0.4313, - "step": 13155 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2439817550316723e-06, - "loss": 0.3888, - "step": 13156 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2427229137691353e-06, - "loss": 0.4424, - "step": 13157 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.241464381104366e-06, - "loss": 0.4118, - "step": 13158 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.240206157087438e-06, - "loss": 0.3816, - "step": 13159 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2389482417684014e-06, - "loss": 0.3426, - "step": 13160 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2376906351973004e-06, - "loss": 0.3741, - "step": 13161 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2364333374241666e-06, - "loss": 0.482, - "step": 13162 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2351763484990074e-06, - "loss": 0.4553, - "step": 13163 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.233919668471839e-06, - "loss": 0.4148, - "step": 13164 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.232663297392649e-06, - "loss": 0.4375, - "step": 13165 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.23140723531142e-06, - "loss": 0.4227, - "step": 13166 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.230151482278119e-06, - "loss": 0.4776, - "step": 13167 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.228896038342703e-06, - "loss": 0.436, - "step": 13168 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.227640903555115e-06, - "loss": 0.4051, - "step": 13169 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.226386077965287e-06, - "loss": 0.4048, - "step": 13170 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2251315616231373e-06, - "loss": 0.3498, - "step": 13171 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.223877354578573e-06, - "loss": 0.4135, - "step": 13172 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.22262345688149e-06, - "loss": 0.4455, - "step": 13173 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2213698685817676e-06, - "loss": 0.491, - "step": 13174 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2201165897292755e-06, - "loss": 0.4478, - "step": 13175 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2188636203738755e-06, - "loss": 0.3874, - "step": 13176 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2176109605654107e-06, - "loss": 0.4283, - "step": 13177 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2163586103537172e-06, - "loss": 0.4479, - "step": 13178 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2151065697886086e-06, - "loss": 0.3954, - "step": 13179 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2138548389198955e-06, - "loss": 0.4984, - "step": 13180 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2126034177973767e-06, - "loss": 0.3769, - "step": 13181 - }, - { - "epoch": 2.38, - "grad_norm": 0.0, - "learning_rate": 2.2113523064708352e-06, - "loss": 0.4469, - "step": 13182 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.210101504990041e-06, - "loss": 0.4066, - "step": 13183 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.208851013404755e-06, - "loss": 0.3841, - "step": 13184 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.2076008317647212e-06, - "loss": 0.461, - "step": 13185 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.2063509601196766e-06, - "loss": 0.5052, - "step": 13186 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.2051013985193414e-06, - "loss": 0.383, - "step": 13187 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.2038521470134254e-06, - "loss": 0.4831, - "step": 13188 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.202603205651628e-06, - "loss": 0.4942, - "step": 13189 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.2013545744836316e-06, - "loss": 0.4155, - "step": 13190 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.200106253559108e-06, - "loss": 0.3828, - "step": 13191 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1988582429277215e-06, - "loss": 0.4805, - "step": 13192 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1976105426391214e-06, - "loss": 0.4213, - "step": 13193 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1963631527429373e-06, - "loss": 0.3937, - "step": 13194 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1951160732887964e-06, - "loss": 0.4138, - "step": 13195 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.193869304326305e-06, - "loss": 0.4252, - "step": 13196 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.192622845905068e-06, - "loss": 0.417, - "step": 13197 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1913766980746688e-06, - "loss": 0.5185, - "step": 13198 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1901308608846816e-06, - "loss": 0.4641, - "step": 13199 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1888853343846706e-06, - "loss": 0.3971, - "step": 13200 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.187640118624177e-06, - "loss": 0.4531, - "step": 13201 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1863952136527454e-06, - "loss": 0.4438, - "step": 13202 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.185150619519897e-06, - "loss": 0.4228, - "step": 13203 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1839063362751456e-06, - "loss": 0.3928, - "step": 13204 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.182662363967989e-06, - "loss": 0.549, - "step": 13205 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.181418702647916e-06, - "loss": 0.439, - "step": 13206 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.180175352364401e-06, - "loss": 0.3645, - "step": 13207 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.178932313166906e-06, - "loss": 0.4167, - "step": 13208 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.177689585104882e-06, - "loss": 0.5001, - "step": 13209 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1764471682277668e-06, - "loss": 0.4484, - "step": 13210 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1752050625849854e-06, - "loss": 0.4736, - "step": 13211 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1739632682259516e-06, - "loss": 0.4192, - "step": 13212 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1727217852000625e-06, - "loss": 0.4265, - "step": 13213 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1714806135567134e-06, - "loss": 0.4443, - "step": 13214 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.170239753345279e-06, - "loss": 0.4424, - "step": 13215 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1689992046151166e-06, - "loss": 0.3959, - "step": 13216 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.167758967415582e-06, - "loss": 0.3877, - "step": 13217 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.16651904179601e-06, - "loss": 0.4696, - "step": 13218 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.165279427805733e-06, - "loss": 0.4392, - "step": 13219 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1640401254940613e-06, - "loss": 0.4993, - "step": 13220 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.162801134910298e-06, - "loss": 0.423, - "step": 13221 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1615624561037306e-06, - "loss": 0.4402, - "step": 13222 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1603240891236375e-06, - "loss": 0.4405, - "step": 13223 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.159086034019282e-06, - "loss": 0.411, - "step": 13224 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1578482908399157e-06, - "loss": 0.4296, - "step": 13225 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.15661085963478e-06, - "loss": 0.525, - "step": 13226 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1553737404530993e-06, - "loss": 0.4465, - "step": 13227 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.154136933344091e-06, - "loss": 0.42, - "step": 13228 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.152900438356953e-06, - "loss": 0.5108, - "step": 13229 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1516642555408806e-06, - "loss": 0.4509, - "step": 13230 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1504283849450523e-06, - "loss": 0.4456, - "step": 13231 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.149192826618628e-06, - "loss": 0.4695, - "step": 13232 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.147957580610761e-06, - "loss": 0.4035, - "step": 13233 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1467226469705904e-06, - "loss": 0.3854, - "step": 13234 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1454880257472488e-06, - "loss": 0.4717, - "step": 13235 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1442537169898492e-06, - "loss": 0.4133, - "step": 13236 - }, - { - "epoch": 2.39, - "grad_norm": 0.0, - "learning_rate": 2.1430197207474947e-06, - "loss": 0.4143, - "step": 13237 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1417860370692776e-06, - "loss": 0.4781, - "step": 13238 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1405526660042685e-06, - "loss": 0.4106, - "step": 13239 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.139319607601542e-06, - "loss": 0.3461, - "step": 13240 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.138086861910147e-06, - "loss": 0.3972, - "step": 13241 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.136854428979125e-06, - "loss": 0.4476, - "step": 13242 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1356223088575035e-06, - "loss": 0.4484, - "step": 13243 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.134390501594299e-06, - "loss": 0.4605, - "step": 13244 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.133159007238512e-06, - "loss": 0.4525, - "step": 13245 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1319278258391427e-06, - "loss": 0.4735, - "step": 13246 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1306969574451595e-06, - "loss": 0.4339, - "step": 13247 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1294664021055335e-06, - "loss": 0.5384, - "step": 13248 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1282361598692157e-06, - "loss": 0.4772, - "step": 13249 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.127006230785147e-06, - "loss": 0.4406, - "step": 13250 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1257766149022596e-06, - "loss": 0.3754, - "step": 13251 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.124547312269468e-06, - "loss": 0.4817, - "step": 13252 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.123318322935678e-06, - "loss": 0.4973, - "step": 13253 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1220896469497767e-06, - "loss": 0.4532, - "step": 13254 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1208612843606413e-06, - "loss": 0.5429, - "step": 13255 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1196332352171445e-06, - "loss": 0.4226, - "step": 13256 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.118405499568138e-06, - "loss": 0.3701, - "step": 13257 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.117178077462463e-06, - "loss": 0.408, - "step": 13258 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1159509689489464e-06, - "loss": 0.4524, - "step": 13259 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.114724174076408e-06, - "loss": 0.4707, - "step": 13260 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.11349769289365e-06, - "loss": 0.4147, - "step": 13261 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.112271525449463e-06, - "loss": 0.4468, - "step": 13262 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.111045671792629e-06, - "loss": 0.4928, - "step": 13263 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1098201319719114e-06, - "loss": 0.3934, - "step": 13264 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1085949060360654e-06, - "loss": 0.4778, - "step": 13265 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.107369994033833e-06, - "loss": 0.4724, - "step": 13266 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1061453960139413e-06, - "loss": 0.4557, - "step": 13267 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1049211120251123e-06, - "loss": 0.4221, - "step": 13268 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1036971421160447e-06, - "loss": 0.3909, - "step": 13269 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.102473486335432e-06, - "loss": 0.458, - "step": 13270 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1012501447319535e-06, - "loss": 0.435, - "step": 13271 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.1000271173542718e-06, - "loss": 0.4369, - "step": 13272 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.098804404251049e-06, - "loss": 0.3787, - "step": 13273 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.097582005470922e-06, - "loss": 0.4799, - "step": 13274 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0963599210625197e-06, - "loss": 0.4609, - "step": 13275 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0951381510744627e-06, - "loss": 0.4202, - "step": 13276 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.093916695555346e-06, - "loss": 0.4361, - "step": 13277 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.09269555455377e-06, - "loss": 0.5343, - "step": 13278 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.091474728118311e-06, - "loss": 0.4901, - "step": 13279 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.090254216297535e-06, - "loss": 0.3803, - "step": 13280 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0890340191399973e-06, - "loss": 0.4706, - "step": 13281 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0878141366942373e-06, - "loss": 0.4616, - "step": 13282 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.086594569008783e-06, - "loss": 0.4586, - "step": 13283 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.085375316132159e-06, - "loss": 0.433, - "step": 13284 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0841563781128606e-06, - "loss": 0.4157, - "step": 13285 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.082937754999381e-06, - "loss": 0.4509, - "step": 13286 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.081719446840201e-06, - "loss": 0.4478, - "step": 13287 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.080501453683784e-06, - "loss": 0.4046, - "step": 13288 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0792837755785877e-06, - "loss": 0.3931, - "step": 13289 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.078066412573053e-06, - "loss": 0.4692, - "step": 13290 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0768493647156095e-06, - "loss": 0.4424, - "step": 13291 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0756326320546682e-06, - "loss": 0.4166, - "step": 13292 - }, - { - "epoch": 2.4, - "grad_norm": 0.0, - "learning_rate": 2.0744162146386338e-06, - "loss": 0.3837, - "step": 13293 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.073200112515902e-06, - "loss": 0.4614, - "step": 13294 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0719843257348494e-06, - "loss": 0.4295, - "step": 13295 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.070768854343842e-06, - "loss": 0.4131, - "step": 13296 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0695536983912323e-06, - "loss": 0.3571, - "step": 13297 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0683388579253627e-06, - "loss": 0.4518, - "step": 13298 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0671243329945614e-06, - "loss": 0.474, - "step": 13299 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0659101236471433e-06, - "loss": 0.3874, - "step": 13300 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0646962299314123e-06, - "loss": 0.4676, - "step": 13301 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0634826518956595e-06, - "loss": 0.5145, - "step": 13302 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.062269389588163e-06, - "loss": 0.35, - "step": 13303 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.061056443057188e-06, - "loss": 0.4383, - "step": 13304 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0598438123509845e-06, - "loss": 0.5143, - "step": 13305 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.058631497517802e-06, - "loss": 0.4483, - "step": 13306 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0574194986058617e-06, - "loss": 0.4856, - "step": 13307 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.056207815663378e-06, - "loss": 0.4689, - "step": 13308 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0549964487385577e-06, - "loss": 0.4584, - "step": 13309 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0537853978795853e-06, - "loss": 0.4707, - "step": 13310 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0525746631346454e-06, - "loss": 0.4929, - "step": 13311 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0513642445519e-06, - "loss": 0.4109, - "step": 13312 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0501541421795025e-06, - "loss": 0.4195, - "step": 13313 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.048944356065593e-06, - "loss": 0.4304, - "step": 13314 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0477348862582947e-06, - "loss": 0.4803, - "step": 13315 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0465257328057265e-06, - "loss": 0.4637, - "step": 13316 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.045316895755991e-06, - "loss": 0.5295, - "step": 13317 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0441083751571755e-06, - "loss": 0.4341, - "step": 13318 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0429001710573592e-06, - "loss": 0.4603, - "step": 13319 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0416922835046037e-06, - "loss": 0.3413, - "step": 13320 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.040484712546964e-06, - "loss": 0.4318, - "step": 13321 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.039277458232477e-06, - "loss": 0.4496, - "step": 13322 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.038070520609171e-06, - "loss": 0.3973, - "step": 13323 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.036863899725059e-06, - "loss": 0.4349, - "step": 13324 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.035657595628142e-06, - "loss": 0.4548, - "step": 13325 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0344516083664078e-06, - "loss": 0.4448, - "step": 13326 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0332459379878356e-06, - "loss": 0.4489, - "step": 13327 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.032040584540389e-06, - "loss": 0.4251, - "step": 13328 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0308355480720197e-06, - "loss": 0.4725, - "step": 13329 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.029630828630663e-06, - "loss": 0.4797, - "step": 13330 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0284264262642415e-06, - "loss": 0.4437, - "step": 13331 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0272223410206772e-06, - "loss": 0.4121, - "step": 13332 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0260185729478652e-06, - "loss": 0.481, - "step": 13333 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0248151220936942e-06, - "loss": 0.4321, - "step": 13334 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.023611988506041e-06, - "loss": 0.4426, - "step": 13335 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0224091722327665e-06, - "loss": 0.4283, - "step": 13336 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0212066733217215e-06, - "loss": 0.4218, - "step": 13337 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.020004491820744e-06, - "loss": 0.4351, - "step": 13338 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0188026277776575e-06, - "loss": 0.4416, - "step": 13339 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0176010812402757e-06, - "loss": 0.3702, - "step": 13340 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0163998522563975e-06, - "loss": 0.4946, - "step": 13341 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.015198940873807e-06, - "loss": 0.4316, - "step": 13342 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0139983471402836e-06, - "loss": 0.4025, - "step": 13343 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.01279807110359e-06, - "loss": 0.4088, - "step": 13344 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0115981128114693e-06, - "loss": 0.465, - "step": 13345 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0103984723116597e-06, - "loss": 0.4044, - "step": 13346 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.0091991496518837e-06, - "loss": 0.3843, - "step": 13347 - }, - { - "epoch": 2.41, - "grad_norm": 0.0, - "learning_rate": 2.008000144879857e-06, - "loss": 0.4939, - "step": 13348 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.006801458043276e-06, - "loss": 0.4771, - "step": 13349 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.005603089189826e-06, - "loss": 0.4065, - "step": 13350 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.004405038367181e-06, - "loss": 0.4112, - "step": 13351 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.0032073056229994e-06, - "loss": 0.4194, - "step": 13352 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.0020098910049315e-06, - "loss": 0.4826, - "step": 13353 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 2.0008127945606105e-06, - "loss": 0.4379, - "step": 13354 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.999616016337661e-06, - "loss": 0.4325, - "step": 13355 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9984195563836907e-06, - "loss": 0.4725, - "step": 13356 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.997223414746299e-06, - "loss": 0.4758, - "step": 13357 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9960275914730686e-06, - "loss": 0.4218, - "step": 13358 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9948320866115723e-06, - "loss": 0.3828, - "step": 13359 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.993636900209369e-06, - "loss": 0.4426, - "step": 13360 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.992442032314006e-06, - "loss": 0.4306, - "step": 13361 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9912474829730166e-06, - "loss": 0.5159, - "step": 13362 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.990053252233921e-06, - "loss": 0.3933, - "step": 13363 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.988859340144227e-06, - "loss": 0.4645, - "step": 13364 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.987665746751436e-06, - "loss": 0.3979, - "step": 13365 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.986472472103026e-06, - "loss": 0.5003, - "step": 13366 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9852795162464723e-06, - "loss": 0.3639, - "step": 13367 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9840868792292267e-06, - "loss": 0.4666, - "step": 13368 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9828945610987337e-06, - "loss": 0.4238, - "step": 13369 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.981702561902432e-06, - "loss": 0.3962, - "step": 13370 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9805108816877394e-06, - "loss": 0.4535, - "step": 13371 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.979319520502062e-06, - "loss": 0.398, - "step": 13372 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9781284783927932e-06, - "loss": 0.4169, - "step": 13373 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9769377554073165e-06, - "loss": 0.46, - "step": 13374 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9757473515930005e-06, - "loss": 0.3765, - "step": 13375 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9745572669972004e-06, - "loss": 0.5201, - "step": 13376 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9733675016672606e-06, - "loss": 0.4673, - "step": 13377 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.972178055650512e-06, - "loss": 0.3732, - "step": 13378 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.970988928994274e-06, - "loss": 0.4956, - "step": 13379 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.969800121745846e-06, - "loss": 0.4098, - "step": 13380 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9686116339525307e-06, - "loss": 0.4166, - "step": 13381 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.967423465661604e-06, - "loss": 0.4198, - "step": 13382 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.96623561692033e-06, - "loss": 0.4754, - "step": 13383 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.965048087775967e-06, - "loss": 0.4717, - "step": 13384 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9638608782757517e-06, - "loss": 0.4378, - "step": 13385 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.962673988466921e-06, - "loss": 0.4978, - "step": 13386 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9614874183966868e-06, - "loss": 0.4772, - "step": 13387 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.960301168112254e-06, - "loss": 0.3957, - "step": 13388 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9591152376608137e-06, - "loss": 0.4596, - "step": 13389 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9579296270895444e-06, - "loss": 0.519, - "step": 13390 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9567443364456106e-06, - "loss": 0.4373, - "step": 13391 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.955559365776166e-06, - "loss": 0.5391, - "step": 13392 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.954374715128352e-06, - "loss": 0.4295, - "step": 13393 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9531903845492937e-06, - "loss": 0.4328, - "step": 13394 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9520063740861074e-06, - "loss": 0.435, - "step": 13395 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9508226837858935e-06, - "loss": 0.4771, - "step": 13396 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9496393136957424e-06, - "loss": 0.469, - "step": 13397 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9484562638627313e-06, - "loss": 0.4428, - "step": 13398 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.947273534333923e-06, - "loss": 0.4184, - "step": 13399 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9460911251563684e-06, - "loss": 0.4804, - "step": 13400 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.944909036377105e-06, - "loss": 0.3937, - "step": 13401 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9437272680431584e-06, - "loss": 0.4151, - "step": 13402 - }, - { - "epoch": 2.42, - "grad_norm": 0.0, - "learning_rate": 1.9425458202015437e-06, - "loss": 0.4249, - "step": 13403 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9413646928992593e-06, - "loss": 0.4163, - "step": 13404 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9401838861832957e-06, - "loss": 0.3787, - "step": 13405 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.939003400100622e-06, - "loss": 0.4375, - "step": 13406 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9378232346981996e-06, - "loss": 0.4445, - "step": 13407 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.936643390022982e-06, - "loss": 0.419, - "step": 13408 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9354638661219036e-06, - "loss": 0.4867, - "step": 13409 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.934284663041889e-06, - "loss": 0.473, - "step": 13410 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9331057808298483e-06, - "loss": 0.3607, - "step": 13411 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9319272195326777e-06, - "loss": 0.412, - "step": 13412 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9307489791972633e-06, - "loss": 0.3939, - "step": 13413 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9295710598704787e-06, - "loss": 0.4816, - "step": 13414 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.928393461599183e-06, - "loss": 0.4669, - "step": 13415 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9272161844302226e-06, - "loss": 0.4811, - "step": 13416 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9260392284104314e-06, - "loss": 0.3888, - "step": 13417 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9248625935866293e-06, - "loss": 0.4717, - "step": 13418 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.923686280005629e-06, - "loss": 0.4399, - "step": 13419 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9225102877142266e-06, - "loss": 0.4129, - "step": 13420 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9213346167591994e-06, - "loss": 0.5204, - "step": 13421 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9201592671873205e-06, - "loss": 0.442, - "step": 13422 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.918984239045346e-06, - "loss": 0.558, - "step": 13423 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.917809532380024e-06, - "loss": 0.4147, - "step": 13424 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.916635147238084e-06, - "loss": 0.482, - "step": 13425 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9154610836662457e-06, - "loss": 0.4788, - "step": 13426 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.914287341711215e-06, - "loss": 0.4171, - "step": 13427 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9131139214196857e-06, - "loss": 0.4316, - "step": 13428 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9119408228383385e-06, - "loss": 0.3765, - "step": 13429 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.91076804601384e-06, - "loss": 0.5096, - "step": 13430 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9095955909928478e-06, - "loss": 0.4445, - "step": 13431 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9084234578220007e-06, - "loss": 0.4613, - "step": 13432 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.907251646547932e-06, - "loss": 0.4806, - "step": 13433 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9060801572172527e-06, - "loss": 0.4697, - "step": 13434 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.904908989876575e-06, - "loss": 0.4448, - "step": 13435 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9037381445724845e-06, - "loss": 0.4709, - "step": 13436 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9025676213515587e-06, - "loss": 0.4326, - "step": 13437 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9013974202603657e-06, - "loss": 0.4744, - "step": 13438 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.9002275413454563e-06, - "loss": 0.3758, - "step": 13439 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8990579846533686e-06, - "loss": 0.494, - "step": 13440 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8978887502306343e-06, - "loss": 0.4322, - "step": 13441 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.896719838123765e-06, - "loss": 0.4676, - "step": 13442 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8955512483792659e-06, - "loss": 0.3853, - "step": 13443 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8943829810436188e-06, - "loss": 0.4641, - "step": 13444 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8932150361632995e-06, - "loss": 0.45, - "step": 13445 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8920474137847767e-06, - "loss": 0.5002, - "step": 13446 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8908801139544964e-06, - "loss": 0.4907, - "step": 13447 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8897131367188981e-06, - "loss": 0.4026, - "step": 13448 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8885464821244048e-06, - "loss": 0.4215, - "step": 13449 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8873801502174271e-06, - "loss": 0.4541, - "step": 13450 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8862141410443657e-06, - "loss": 0.4928, - "step": 13451 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8850484546516057e-06, - "loss": 0.4922, - "step": 13452 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8838830910855188e-06, - "loss": 0.4612, - "step": 13453 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8827180503924668e-06, - "loss": 0.4672, - "step": 13454 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8815533326187963e-06, - "loss": 0.4292, - "step": 13455 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8803889378108397e-06, - "loss": 0.4989, - "step": 13456 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8792248660149225e-06, - "loss": 0.4509, - "step": 13457 - }, - { - "epoch": 2.43, - "grad_norm": 0.0, - "learning_rate": 1.8780611172773556e-06, - "loss": 0.4376, - "step": 13458 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8768976916444281e-06, - "loss": 0.3926, - "step": 13459 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8757345891624257e-06, - "loss": 0.4009, - "step": 13460 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8745718098776167e-06, - "loss": 0.3843, - "step": 13461 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.873409353836263e-06, - "loss": 0.3374, - "step": 13462 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8722472210846064e-06, - "loss": 0.4853, - "step": 13463 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8710854116688804e-06, - "loss": 0.4293, - "step": 13464 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.869923925635304e-06, - "loss": 0.3921, - "step": 13465 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8687627630300764e-06, - "loss": 0.4335, - "step": 13466 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.867601923899398e-06, - "loss": 0.4775, - "step": 13467 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8664414082894467e-06, - "loss": 0.4194, - "step": 13468 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8652812162463907e-06, - "loss": 0.4284, - "step": 13469 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8641213478163821e-06, - "loss": 0.4715, - "step": 13470 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8629618030455643e-06, - "loss": 0.4697, - "step": 13471 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8618025819800633e-06, - "loss": 0.46, - "step": 13472 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8606436846660013e-06, - "loss": 0.4653, - "step": 13473 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8594851111494762e-06, - "loss": 0.4908, - "step": 13474 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.858326861476577e-06, - "loss": 0.4432, - "step": 13475 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8571689356933831e-06, - "loss": 0.4887, - "step": 13476 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8560113338459562e-06, - "loss": 0.4744, - "step": 13477 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8548540559803529e-06, - "loss": 0.4932, - "step": 13478 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8536971021426075e-06, - "loss": 0.4148, - "step": 13479 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8525404723787477e-06, - "loss": 0.4105, - "step": 13480 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8513841667347877e-06, - "loss": 0.4584, - "step": 13481 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8502281852567195e-06, - "loss": 0.4542, - "step": 13482 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8490725279905385e-06, - "loss": 0.3377, - "step": 13483 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.847917194982215e-06, - "loss": 0.46, - "step": 13484 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8467621862777109e-06, - "loss": 0.5053, - "step": 13485 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8456075019229747e-06, - "loss": 0.3868, - "step": 13486 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8444531419639411e-06, - "loss": 0.4053, - "step": 13487 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8432991064465322e-06, - "loss": 0.4089, - "step": 13488 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.842145395416659e-06, - "loss": 0.4076, - "step": 13489 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.840992008920217e-06, - "loss": 0.4865, - "step": 13490 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8398389470030896e-06, - "loss": 0.4752, - "step": 13491 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.838686209711148e-06, - "loss": 0.4255, - "step": 13492 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8375337970902508e-06, - "loss": 0.4421, - "step": 13493 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.836381709186239e-06, - "loss": 0.4587, - "step": 13494 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8352299460449497e-06, - "loss": 0.4436, - "step": 13495 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8340785077122037e-06, - "loss": 0.5062, - "step": 13496 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8329273942338011e-06, - "loss": 0.4592, - "step": 13497 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8317766056555376e-06, - "loss": 0.4344, - "step": 13498 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8306261420231908e-06, - "loss": 0.4349, - "step": 13499 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8294760033825344e-06, - "loss": 0.4769, - "step": 13500 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8283261897793192e-06, - "loss": 0.3834, - "step": 13501 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8271767012592867e-06, - "loss": 0.4123, - "step": 13502 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.826027537868168e-06, - "loss": 0.4519, - "step": 13503 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.824878699651672e-06, - "loss": 0.4576, - "step": 13504 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.823730186655509e-06, - "loss": 0.4163, - "step": 13505 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8225819989253658e-06, - "loss": 0.4718, - "step": 13506 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8214341365069187e-06, - "loss": 0.4842, - "step": 13507 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.820286599445833e-06, - "loss": 0.4673, - "step": 13508 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8191393877877583e-06, - "loss": 0.4105, - "step": 13509 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8179925015783306e-06, - "loss": 0.4811, - "step": 13510 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8168459408631845e-06, - "loss": 0.4855, - "step": 13511 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8156997056879211e-06, - "loss": 0.4213, - "step": 13512 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8145537960981451e-06, - "loss": 0.5493, - "step": 13513 - }, - { - "epoch": 2.44, - "grad_norm": 0.0, - "learning_rate": 1.8134082121394403e-06, - "loss": 0.4145, - "step": 13514 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8122629538573798e-06, - "loss": 0.4632, - "step": 13515 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8111180212975277e-06, - "loss": 0.5228, - "step": 13516 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.809973414505428e-06, - "loss": 0.4488, - "step": 13517 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8088291335266206e-06, - "loss": 0.4113, - "step": 13518 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8076851784066185e-06, - "loss": 0.4536, - "step": 13519 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.806541549190932e-06, - "loss": 0.4623, - "step": 13520 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8053982459250618e-06, - "loss": 0.3951, - "step": 13521 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8042552686544867e-06, - "loss": 0.4443, - "step": 13522 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8031126174246783e-06, - "loss": 0.5011, - "step": 13523 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8019702922810911e-06, - "loss": 0.4237, - "step": 13524 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.8008282932691701e-06, - "loss": 0.4397, - "step": 13525 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7996866204343454e-06, - "loss": 0.4606, - "step": 13526 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7985452738220343e-06, - "loss": 0.3532, - "step": 13527 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7974042534776425e-06, - "loss": 0.5037, - "step": 13528 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7962635594465616e-06, - "loss": 0.4506, - "step": 13529 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7951231917741707e-06, - "loss": 0.3948, - "step": 13530 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7939831505058336e-06, - "loss": 0.4323, - "step": 13531 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.792843435686903e-06, - "loss": 0.4391, - "step": 13532 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7917040473627223e-06, - "loss": 0.3942, - "step": 13533 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7905649855786177e-06, - "loss": 0.4183, - "step": 13534 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7894262503798999e-06, - "loss": 0.4474, - "step": 13535 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7882878418118721e-06, - "loss": 0.4148, - "step": 13536 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7871497599198174e-06, - "loss": 0.453, - "step": 13537 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7860120047490182e-06, - "loss": 0.4655, - "step": 13538 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7848745763447328e-06, - "loss": 0.3831, - "step": 13539 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7837374747522097e-06, - "loss": 0.4599, - "step": 13540 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.782600700016688e-06, - "loss": 0.4299, - "step": 13541 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7814642521833813e-06, - "loss": 0.4288, - "step": 13542 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7803281312975095e-06, - "loss": 0.3784, - "step": 13543 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7791923374042652e-06, - "loss": 0.4431, - "step": 13544 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7780568705488333e-06, - "loss": 0.4355, - "step": 13545 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7769217307763842e-06, - "loss": 0.4955, - "step": 13546 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7757869181320753e-06, - "loss": 0.4129, - "step": 13547 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7746524326610482e-06, - "loss": 0.4537, - "step": 13548 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7735182744084444e-06, - "loss": 0.4108, - "step": 13549 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7723844434193738e-06, - "loss": 0.371, - "step": 13550 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7712509397389444e-06, - "loss": 0.4391, - "step": 13551 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.770117763412249e-06, - "loss": 0.4777, - "step": 13552 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7689849144843652e-06, - "loss": 0.3457, - "step": 13553 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7678523930003645e-06, - "loss": 0.4811, - "step": 13554 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7667201990052974e-06, - "loss": 0.5056, - "step": 13555 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.765588332544209e-06, - "loss": 0.5001, - "step": 13556 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7644567936621194e-06, - "loss": 0.4076, - "step": 13557 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7633255824040453e-06, - "loss": 0.4818, - "step": 13558 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7621946988149919e-06, - "loss": 0.456, - "step": 13559 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7610641429399456e-06, - "loss": 0.4235, - "step": 13560 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7599339148238815e-06, - "loss": 0.4204, - "step": 13561 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7588040145117614e-06, - "loss": 0.409, - "step": 13562 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.757674442048537e-06, - "loss": 0.4666, - "step": 13563 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7565451974791426e-06, - "loss": 0.4069, - "step": 13564 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.755416280848502e-06, - "loss": 0.4904, - "step": 13565 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7542876922015262e-06, - "loss": 0.4008, - "step": 13566 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7531594315831114e-06, - "loss": 0.4669, - "step": 13567 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7520314990381416e-06, - "loss": 0.3887, - "step": 13568 - }, - { - "epoch": 2.45, - "grad_norm": 0.0, - "learning_rate": 1.7509038946114853e-06, - "loss": 0.5043, - "step": 13569 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.749776618348007e-06, - "loss": 0.4212, - "step": 13570 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7486496702925504e-06, - "loss": 0.4768, - "step": 13571 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.747523050489943e-06, - "loss": 0.4253, - "step": 13572 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7463967589850062e-06, - "loss": 0.4563, - "step": 13573 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7452707958225423e-06, - "loss": 0.4393, - "step": 13574 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7441451610473504e-06, - "loss": 0.4647, - "step": 13575 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7430198547042066e-06, - "loss": 0.4685, - "step": 13576 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7418948768378785e-06, - "loss": 0.4091, - "step": 13577 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7407702274931182e-06, - "loss": 0.4511, - "step": 13578 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7396459067146677e-06, - "loss": 0.4218, - "step": 13579 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7385219145472543e-06, - "loss": 0.4453, - "step": 13580 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7373982510355902e-06, - "loss": 0.4591, - "step": 13581 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7362749162243797e-06, - "loss": 0.4036, - "step": 13582 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7351519101583092e-06, - "loss": 0.4071, - "step": 13583 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7340292328820541e-06, - "loss": 0.3923, - "step": 13584 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7329068844402775e-06, - "loss": 0.4191, - "step": 13585 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7317848648776236e-06, - "loss": 0.3551, - "step": 13586 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.730663174238737e-06, - "loss": 0.4957, - "step": 13587 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.729541812568234e-06, - "loss": 0.4052, - "step": 13588 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7284207799107255e-06, - "loss": 0.3938, - "step": 13589 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7273000763108071e-06, - "loss": 0.4705, - "step": 13590 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.726179701813062e-06, - "loss": 0.3509, - "step": 13591 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7250596564620647e-06, - "loss": 0.4266, - "step": 13592 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7239399403023704e-06, - "loss": 0.3924, - "step": 13593 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7228205533785259e-06, - "loss": 0.4061, - "step": 13594 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7217014957350554e-06, - "loss": 0.386, - "step": 13595 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7205827674164788e-06, - "loss": 0.4355, - "step": 13596 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7194643684673063e-06, - "loss": 0.5065, - "step": 13597 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7183462989320265e-06, - "loss": 0.4076, - "step": 13598 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7172285588551173e-06, - "loss": 0.4809, - "step": 13599 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7161111482810466e-06, - "loss": 0.4, - "step": 13600 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7149940672542641e-06, - "loss": 0.4437, - "step": 13601 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7138773158192113e-06, - "loss": 0.4253, - "step": 13602 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7127608940203143e-06, - "loss": 0.4469, - "step": 13603 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7116448019019849e-06, - "loss": 0.5206, - "step": 13604 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7105290395086238e-06, - "loss": 0.4215, - "step": 13605 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.709413606884619e-06, - "loss": 0.4945, - "step": 13606 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7082985040743406e-06, - "loss": 0.4014, - "step": 13607 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7071837311221552e-06, - "loss": 0.392, - "step": 13608 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7060692880724095e-06, - "loss": 0.4772, - "step": 13609 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7049551749694326e-06, - "loss": 0.5008, - "step": 13610 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7038413918575503e-06, - "loss": 0.4516, - "step": 13611 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7027279387810657e-06, - "loss": 0.3935, - "step": 13612 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7016148157842816e-06, - "loss": 0.4557, - "step": 13613 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.7005020229114744e-06, - "loss": 0.4338, - "step": 13614 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6993895602069156e-06, - "loss": 0.4321, - "step": 13615 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6982774277148605e-06, - "loss": 0.339, - "step": 13616 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.69716562547955e-06, - "loss": 0.4092, - "step": 13617 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6960541535452145e-06, - "loss": 0.456, - "step": 13618 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6949430119560706e-06, - "loss": 0.3885, - "step": 13619 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.69383220075632e-06, - "loss": 0.4327, - "step": 13620 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6927217199901546e-06, - "loss": 0.3617, - "step": 13621 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.691611569701751e-06, - "loss": 0.4473, - "step": 13622 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6905017499352716e-06, - "loss": 0.4218, - "step": 13623 - }, - { - "epoch": 2.46, - "grad_norm": 0.0, - "learning_rate": 1.6893922607348667e-06, - "loss": 0.4705, - "step": 13624 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6882831021446755e-06, - "loss": 0.3933, - "step": 13625 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6871742742088216e-06, - "loss": 0.3719, - "step": 13626 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.686065776971415e-06, - "loss": 0.4408, - "step": 13627 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6849576104765542e-06, - "loss": 0.5165, - "step": 13628 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6838497747683214e-06, - "loss": 0.4905, - "step": 13629 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6827422698907946e-06, - "loss": 0.4598, - "step": 13630 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.681635095888028e-06, - "loss": 0.5641, - "step": 13631 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.680528252804069e-06, - "loss": 0.4336, - "step": 13632 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6794217406829473e-06, - "loss": 0.3517, - "step": 13633 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6783155595686783e-06, - "loss": 0.481, - "step": 13634 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6772097095052763e-06, - "loss": 0.5142, - "step": 13635 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6761041905367293e-06, - "loss": 0.3743, - "step": 13636 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6749990027070174e-06, - "loss": 0.568, - "step": 13637 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6738941460601054e-06, - "loss": 0.4971, - "step": 13638 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6727896206399496e-06, - "loss": 0.4044, - "step": 13639 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.671685426490487e-06, - "loss": 0.4202, - "step": 13640 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6705815636556455e-06, - "loss": 0.4636, - "step": 13641 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.669478032179338e-06, - "loss": 0.3558, - "step": 13642 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.668374832105466e-06, - "loss": 0.4827, - "step": 13643 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6672719634779156e-06, - "loss": 0.4034, - "step": 13644 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6661694263405592e-06, - "loss": 0.4499, - "step": 13645 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.665067220737262e-06, - "loss": 0.4172, - "step": 13646 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.663965346711871e-06, - "loss": 0.5658, - "step": 13647 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.662863804308218e-06, - "loss": 0.4121, - "step": 13648 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6617625935701242e-06, - "loss": 0.4042, - "step": 13649 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6606617145413962e-06, - "loss": 0.4323, - "step": 13650 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6595611672658341e-06, - "loss": 0.4534, - "step": 13651 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6584609517872163e-06, - "loss": 0.4751, - "step": 13652 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6573610681493114e-06, - "loss": 0.3873, - "step": 13653 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6562615163958751e-06, - "loss": 0.4477, - "step": 13654 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6551622965706493e-06, - "loss": 0.5086, - "step": 13655 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6540634087173634e-06, - "loss": 0.3972, - "step": 13656 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6529648528797327e-06, - "loss": 0.3285, - "step": 13657 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6518666291014584e-06, - "loss": 0.4353, - "step": 13658 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6507687374262316e-06, - "loss": 0.4946, - "step": 13659 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.649671177897727e-06, - "loss": 0.4749, - "step": 13660 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6485739505596054e-06, - "loss": 0.4665, - "step": 13661 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6474770554555241e-06, - "loss": 0.454, - "step": 13662 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6463804926291117e-06, - "loss": 0.41, - "step": 13663 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6452842621239927e-06, - "loss": 0.4651, - "step": 13664 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6441883639837797e-06, - "loss": 0.4203, - "step": 13665 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.643092798252065e-06, - "loss": 0.4659, - "step": 13666 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6419975649724374e-06, - "loss": 0.4095, - "step": 13667 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.640902664188464e-06, - "loss": 0.444, - "step": 13668 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6398080959437035e-06, - "loss": 0.4608, - "step": 13669 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6387138602816998e-06, - "loss": 0.4589, - "step": 13670 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6376199572459795e-06, - "loss": 0.4763, - "step": 13671 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.636526386880064e-06, - "loss": 0.3493, - "step": 13672 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6354331492274566e-06, - "loss": 0.3887, - "step": 13673 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.634340244331648e-06, - "loss": 0.4023, - "step": 13674 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6332476722361145e-06, - "loss": 0.368, - "step": 13675 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6321554329843236e-06, - "loss": 0.4232, - "step": 13676 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6310635266197227e-06, - "loss": 0.4402, - "step": 13677 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6299719531857527e-06, - "loss": 0.4851, - "step": 13678 - }, - { - "epoch": 2.47, - "grad_norm": 0.0, - "learning_rate": 1.6288807127258366e-06, - "loss": 0.486, - "step": 13679 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6277898052833862e-06, - "loss": 0.3648, - "step": 13680 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6266992309017993e-06, - "loss": 0.4017, - "step": 13681 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.625608989624462e-06, - "loss": 0.3828, - "step": 13682 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6245190814947432e-06, - "loss": 0.4133, - "step": 13683 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6234295065560046e-06, - "loss": 0.3683, - "step": 13684 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6223402648515928e-06, - "loss": 0.4704, - "step": 13685 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6212513564248355e-06, - "loss": 0.4158, - "step": 13686 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6201627813190523e-06, - "loss": 0.4224, - "step": 13687 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6190745395775464e-06, - "loss": 0.3738, - "step": 13688 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6179866312436144e-06, - "loss": 0.4134, - "step": 13689 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6168990563605346e-06, - "loss": 0.4618, - "step": 13690 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6158118149715706e-06, - "loss": 0.4488, - "step": 13691 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6147249071199767e-06, - "loss": 0.5266, - "step": 13692 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.613638332848989e-06, - "loss": 0.5007, - "step": 13693 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6125520922018368e-06, - "loss": 0.4431, - "step": 13694 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6114661852217295e-06, - "loss": 0.4152, - "step": 13695 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6103806119518682e-06, - "loss": 0.4197, - "step": 13696 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6092953724354388e-06, - "loss": 0.4701, - "step": 13697 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6082104667156128e-06, - "loss": 0.4155, - "step": 13698 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6071258948355484e-06, - "loss": 0.4323, - "step": 13699 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6060416568383986e-06, - "loss": 0.4573, - "step": 13700 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6049577527672888e-06, - "loss": 0.4381, - "step": 13701 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6038741826653414e-06, - "loss": 0.3981, - "step": 13702 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.602790946575662e-06, - "loss": 0.5186, - "step": 13703 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6017080445413413e-06, - "loss": 0.5072, - "step": 13704 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.6006254766054651e-06, - "loss": 0.3969, - "step": 13705 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5995432428110957e-06, - "loss": 0.4801, - "step": 13706 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5984613432012862e-06, - "loss": 0.4408, - "step": 13707 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5973797778190802e-06, - "loss": 0.3188, - "step": 13708 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5962985467074964e-06, - "loss": 0.535, - "step": 13709 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5952176499095552e-06, - "loss": 0.4463, - "step": 13710 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5941370874682539e-06, - "loss": 0.5037, - "step": 13711 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5930568594265795e-06, - "loss": 0.4764, - "step": 13712 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5919769658275052e-06, - "loss": 0.428, - "step": 13713 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5908974067139903e-06, - "loss": 0.3517, - "step": 13714 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5898181821289827e-06, - "loss": 0.4546, - "step": 13715 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5887392921154143e-06, - "loss": 0.4893, - "step": 13716 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5876607367162078e-06, - "loss": 0.4692, - "step": 13717 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5865825159742676e-06, - "loss": 0.5118, - "step": 13718 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5855046299324872e-06, - "loss": 0.4577, - "step": 13719 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.584427078633748e-06, - "loss": 0.4386, - "step": 13720 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5833498621209141e-06, - "loss": 0.436, - "step": 13721 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5822729804368432e-06, - "loss": 0.384, - "step": 13722 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5811964336243769e-06, - "loss": 0.4844, - "step": 13723 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5801202217263345e-06, - "loss": 0.4568, - "step": 13724 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5790443447855354e-06, - "loss": 0.4651, - "step": 13725 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5779688028447748e-06, - "loss": 0.3814, - "step": 13726 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5768935959468457e-06, - "loss": 0.3885, - "step": 13727 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5758187241345192e-06, - "loss": 0.3677, - "step": 13728 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5747441874505542e-06, - "loss": 0.4692, - "step": 13729 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5736699859377025e-06, - "loss": 0.5118, - "step": 13730 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5725961196386885e-06, - "loss": 0.4917, - "step": 13731 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5715225885962393e-06, - "loss": 0.4412, - "step": 13732 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5704493928530618e-06, - "loss": 0.4376, - "step": 13733 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5693765324518474e-06, - "loss": 0.4193, - "step": 13734 - }, - { - "epoch": 2.48, - "grad_norm": 0.0, - "learning_rate": 1.5683040074352762e-06, - "loss": 0.365, - "step": 13735 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5672318178460166e-06, - "loss": 0.4548, - "step": 13736 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.56615996372672e-06, - "loss": 0.4695, - "step": 13737 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5650884451200322e-06, - "loss": 0.4828, - "step": 13738 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5640172620685723e-06, - "loss": 0.4648, - "step": 13739 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5629464146149587e-06, - "loss": 0.3473, - "step": 13740 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5618759028017894e-06, - "loss": 0.4437, - "step": 13741 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5608057266716503e-06, - "loss": 0.4367, - "step": 13742 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5597358862671186e-06, - "loss": 0.3667, - "step": 13743 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5586663816307523e-06, - "loss": 0.4499, - "step": 13744 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5575972128050976e-06, - "loss": 0.3884, - "step": 13745 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5565283798326914e-06, - "loss": 0.4421, - "step": 13746 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5554598827560463e-06, - "loss": 0.5135, - "step": 13747 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.554391721617675e-06, - "loss": 0.4752, - "step": 13748 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5533238964600694e-06, - "loss": 0.4343, - "step": 13749 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5522564073257097e-06, - "loss": 0.4522, - "step": 13750 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5511892542570606e-06, - "loss": 0.4835, - "step": 13751 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5501224372965773e-06, - "loss": 0.4663, - "step": 13752 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5490559564866992e-06, - "loss": 0.45, - "step": 13753 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5479898118698523e-06, - "loss": 0.468, - "step": 13754 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5469240034884493e-06, - "loss": 0.3818, - "step": 13755 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5458585313848906e-06, - "loss": 0.421, - "step": 13756 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5447933956015627e-06, - "loss": 0.3919, - "step": 13757 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5437285961808357e-06, - "loss": 0.478, - "step": 13758 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5426641331650739e-06, - "loss": 0.4603, - "step": 13759 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5416000065966208e-06, - "loss": 0.4778, - "step": 13760 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5405362165178118e-06, - "loss": 0.4203, - "step": 13761 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5394727629709616e-06, - "loss": 0.4811, - "step": 13762 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5384096459983767e-06, - "loss": 0.4235, - "step": 13763 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5373468656423529e-06, - "loss": 0.4172, - "step": 13764 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5362844219451688e-06, - "loss": 0.4484, - "step": 13765 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5352223149490896e-06, - "loss": 0.4137, - "step": 13766 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5341605446963681e-06, - "loss": 0.5008, - "step": 13767 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5330991112292415e-06, - "loss": 0.4367, - "step": 13768 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5320380145899383e-06, - "loss": 0.437, - "step": 13769 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5309772548206692e-06, - "loss": 0.4153, - "step": 13770 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5299168319636327e-06, - "loss": 0.4124, - "step": 13771 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5288567460610138e-06, - "loss": 0.4672, - "step": 13772 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5277969971549867e-06, - "loss": 0.4216, - "step": 13773 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5267375852877087e-06, - "loss": 0.3571, - "step": 13774 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5256785105013216e-06, - "loss": 0.3918, - "step": 13775 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5246197728379664e-06, - "loss": 0.4584, - "step": 13776 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5235613723397536e-06, - "loss": 0.4866, - "step": 13777 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5225033090487896e-06, - "loss": 0.4655, - "step": 13778 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5214455830071672e-06, - "loss": 0.3636, - "step": 13779 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.520388194256962e-06, - "loss": 0.4794, - "step": 13780 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5193311428402424e-06, - "loss": 0.4261, - "step": 13781 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.518274428799058e-06, - "loss": 0.427, - "step": 13782 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5172180521754499e-06, - "loss": 0.4423, - "step": 13783 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5161620130114374e-06, - "loss": 0.4379, - "step": 13784 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5151063113490305e-06, - "loss": 0.4864, - "step": 13785 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5140509472302323e-06, - "loss": 0.3875, - "step": 13786 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5129959206970247e-06, - "loss": 0.3865, - "step": 13787 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5119412317913795e-06, - "loss": 0.392, - "step": 13788 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5108868805552513e-06, - "loss": 0.4385, - "step": 13789 - }, - { - "epoch": 2.49, - "grad_norm": 0.0, - "learning_rate": 1.5098328670305872e-06, - "loss": 0.4435, - "step": 13790 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5087791912593152e-06, - "loss": 0.519, - "step": 13791 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5077258532833527e-06, - "loss": 0.4297, - "step": 13792 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.506672853144605e-06, - "loss": 0.4144, - "step": 13793 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.505620190884961e-06, - "loss": 0.4267, - "step": 13794 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5045678665462971e-06, - "loss": 0.4146, - "step": 13795 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5035158801704742e-06, - "loss": 0.4219, - "step": 13796 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5024642317993477e-06, - "loss": 0.4464, - "step": 13797 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.5014129214747509e-06, - "loss": 0.4385, - "step": 13798 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.500361949238509e-06, - "loss": 0.4201, - "step": 13799 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.499311315132428e-06, - "loss": 0.4254, - "step": 13800 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4982610191983028e-06, - "loss": 0.4504, - "step": 13801 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4972110614779212e-06, - "loss": 0.4599, - "step": 13802 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4961614420130488e-06, - "loss": 0.4423, - "step": 13803 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4951121608454422e-06, - "loss": 0.4503, - "step": 13804 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4940632180168435e-06, - "loss": 0.4145, - "step": 13805 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4930146135689826e-06, - "loss": 0.4194, - "step": 13806 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.491966347543572e-06, - "loss": 0.5116, - "step": 13807 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4909184199823168e-06, - "loss": 0.4675, - "step": 13808 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4898708309269028e-06, - "loss": 0.3286, - "step": 13809 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4888235804190054e-06, - "loss": 0.4051, - "step": 13810 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.487776668500287e-06, - "loss": 0.4374, - "step": 13811 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4867300952123953e-06, - "loss": 0.4319, - "step": 13812 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4856838605969614e-06, - "loss": 0.4523, - "step": 13813 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4846379646956144e-06, - "loss": 0.3993, - "step": 13814 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.483592407549954e-06, - "loss": 0.4222, - "step": 13815 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4825471892015775e-06, - "loss": 0.4415, - "step": 13816 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4815023096920654e-06, - "loss": 0.3926, - "step": 13817 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4804577690629818e-06, - "loss": 0.3925, - "step": 13818 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.479413567355885e-06, - "loss": 0.4327, - "step": 13819 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4783697046123124e-06, - "loss": 0.4738, - "step": 13820 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4773261808737949e-06, - "loss": 0.4488, - "step": 13821 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4762829961818392e-06, - "loss": 0.3971, - "step": 13822 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4752401505779456e-06, - "loss": 0.3616, - "step": 13823 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4741976441036054e-06, - "loss": 0.4672, - "step": 13824 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4731554768002875e-06, - "loss": 0.5278, - "step": 13825 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4721136487094524e-06, - "loss": 0.4442, - "step": 13826 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4710721598725453e-06, - "loss": 0.4261, - "step": 13827 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.470031010330999e-06, - "loss": 0.4989, - "step": 13828 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4689902001262313e-06, - "loss": 0.4119, - "step": 13829 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.467949729299648e-06, - "loss": 0.4235, - "step": 13830 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4669095978926407e-06, - "loss": 0.4991, - "step": 13831 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.465869805946587e-06, - "loss": 0.4178, - "step": 13832 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.464830353502853e-06, - "loss": 0.4435, - "step": 13833 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4637912406027866e-06, - "loss": 0.4709, - "step": 13834 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4627524672877302e-06, - "loss": 0.4148, - "step": 13835 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4617140335990087e-06, - "loss": 0.4686, - "step": 13836 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4606759395779269e-06, - "loss": 0.4553, - "step": 13837 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4596381852657849e-06, - "loss": 0.4872, - "step": 13838 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.458600770703864e-06, - "loss": 0.3742, - "step": 13839 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4575636959334383e-06, - "loss": 0.4059, - "step": 13840 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4565269609957644e-06, - "loss": 0.4809, - "step": 13841 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4554905659320827e-06, - "loss": 0.4683, - "step": 13842 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4544545107836238e-06, - "loss": 0.4587, - "step": 13843 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4534187955916047e-06, - "loss": 0.4017, - "step": 13844 - }, - { - "epoch": 2.5, - "grad_norm": 0.0, - "learning_rate": 1.4523834203972253e-06, - "loss": 0.4847, - "step": 13845 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4513483852416776e-06, - "loss": 0.5053, - "step": 13846 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4503136901661364e-06, - "loss": 0.4447, - "step": 13847 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4492793352117618e-06, - "loss": 0.4766, - "step": 13848 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4482453204197034e-06, - "loss": 0.4356, - "step": 13849 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4472116458310948e-06, - "loss": 0.3894, - "step": 13850 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4461783114870598e-06, - "loss": 0.4224, - "step": 13851 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4451453174287089e-06, - "loss": 0.4601, - "step": 13852 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4441126636971292e-06, - "loss": 0.4135, - "step": 13853 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4430803503334045e-06, - "loss": 0.4787, - "step": 13854 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.442048377378601e-06, - "loss": 0.4715, - "step": 13855 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4410167448737744e-06, - "loss": 0.4758, - "step": 13856 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4399854528599656e-06, - "loss": 0.4052, - "step": 13857 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.438954501378199e-06, - "loss": 0.4689, - "step": 13858 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4379238904694903e-06, - "loss": 0.4529, - "step": 13859 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.436893620174833e-06, - "loss": 0.4475, - "step": 13860 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4358636905352186e-06, - "loss": 0.5057, - "step": 13861 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.434834101591618e-06, - "loss": 0.4819, - "step": 13862 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.433804853384989e-06, - "loss": 0.4116, - "step": 13863 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4327759459562785e-06, - "loss": 0.4322, - "step": 13864 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4317473793464176e-06, - "loss": 0.3916, - "step": 13865 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4307191535963227e-06, - "loss": 0.4262, - "step": 13866 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.429691268746901e-06, - "loss": 0.4192, - "step": 13867 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4286637248390423e-06, - "loss": 0.4203, - "step": 13868 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4276365219136245e-06, - "loss": 0.4723, - "step": 13869 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4266096600115109e-06, - "loss": 0.4079, - "step": 13870 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4255831391735519e-06, - "loss": 0.4612, - "step": 13871 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4245569594405818e-06, - "loss": 0.4275, - "step": 13872 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4235311208534285e-06, - "loss": 0.3931, - "step": 13873 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4225056234529033e-06, - "loss": 0.4425, - "step": 13874 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4214804672797943e-06, - "loss": 0.4022, - "step": 13875 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4204556523748892e-06, - "loss": 0.4927, - "step": 13876 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4194311787789539e-06, - "loss": 0.453, - "step": 13877 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.418407046532747e-06, - "loss": 0.4602, - "step": 13878 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4173832556770096e-06, - "loss": 0.5027, - "step": 13879 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4163598062524685e-06, - "loss": 0.4475, - "step": 13880 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4153366982998395e-06, - "loss": 0.4421, - "step": 13881 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.414313931859822e-06, - "loss": 0.4052, - "step": 13882 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4132915069731057e-06, - "loss": 0.4154, - "step": 13883 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4122694236803624e-06, - "loss": 0.4778, - "step": 13884 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4112476820222531e-06, - "loss": 0.4479, - "step": 13885 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4102262820394242e-06, - "loss": 0.3965, - "step": 13886 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4092052237725084e-06, - "loss": 0.4971, - "step": 13887 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4081845072621247e-06, - "loss": 0.4572, - "step": 13888 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4071641325488839e-06, - "loss": 0.4632, - "step": 13889 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.406144099673371e-06, - "loss": 0.3921, - "step": 13890 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4051244086761696e-06, - "loss": 0.4709, - "step": 13891 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4041050595978423e-06, - "loss": 0.4441, - "step": 13892 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.403086052478938e-06, - "loss": 0.4332, - "step": 13893 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4020673873600011e-06, - "loss": 0.3908, - "step": 13894 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4010490642815532e-06, - "loss": 0.3781, - "step": 13895 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.4000310832841035e-06, - "loss": 0.3796, - "step": 13896 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.3990134444081527e-06, - "loss": 0.3736, - "step": 13897 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.3979961476941762e-06, - "loss": 0.4076, - "step": 13898 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.3969791931826516e-06, - "loss": 0.4522, - "step": 13899 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.395962580914032e-06, - "loss": 0.4186, - "step": 13900 - }, - { - "epoch": 2.51, - "grad_norm": 0.0, - "learning_rate": 1.3949463109287609e-06, - "loss": 0.4175, - "step": 13901 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3939303832672667e-06, - "loss": 0.3675, - "step": 13902 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3929147979699654e-06, - "loss": 0.469, - "step": 13903 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3918995550772574e-06, - "loss": 0.3828, - "step": 13904 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.390884654629533e-06, - "loss": 0.3979, - "step": 13905 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3898700966671642e-06, - "loss": 0.5318, - "step": 13906 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3888558812305131e-06, - "loss": 0.4151, - "step": 13907 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3878420083599265e-06, - "loss": 0.4137, - "step": 13908 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3868284780957376e-06, - "loss": 0.5103, - "step": 13909 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3858152904782663e-06, - "loss": 0.4413, - "step": 13910 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3848024455478204e-06, - "loss": 0.3984, - "step": 13911 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3837899433446954e-06, - "loss": 0.4041, - "step": 13912 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3827777839091638e-06, - "loss": 0.4739, - "step": 13913 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3817659672814942e-06, - "loss": 0.4231, - "step": 13914 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3807544935019369e-06, - "loss": 0.4351, - "step": 13915 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3797433626107326e-06, - "loss": 0.4935, - "step": 13916 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.378732574648105e-06, - "loss": 0.5198, - "step": 13917 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3777221296542642e-06, - "loss": 0.4403, - "step": 13918 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.376712027669409e-06, - "loss": 0.4943, - "step": 13919 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3757022687337207e-06, - "loss": 0.4547, - "step": 13920 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3746928528873715e-06, - "loss": 0.4356, - "step": 13921 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.373683780170516e-06, - "loss": 0.3619, - "step": 13922 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3726750506232967e-06, - "loss": 0.5129, - "step": 13923 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3716666642858445e-06, - "loss": 0.4678, - "step": 13924 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.370658621198273e-06, - "loss": 0.4906, - "step": 13925 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3696509214006815e-06, - "loss": 0.4502, - "step": 13926 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3686435649331664e-06, - "loss": 0.4539, - "step": 13927 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.367636551835795e-06, - "loss": 0.406, - "step": 13928 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.366629882148628e-06, - "loss": 0.4605, - "step": 13929 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3656235559117148e-06, - "loss": 0.4612, - "step": 13930 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3646175731650856e-06, - "loss": 0.4548, - "step": 13931 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3636119339487651e-06, - "loss": 0.453, - "step": 13932 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.362606638302757e-06, - "loss": 0.418, - "step": 13933 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3616016862670534e-06, - "loss": 0.4899, - "step": 13934 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3605970778816358e-06, - "loss": 0.4767, - "step": 13935 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3595928131864611e-06, - "loss": 0.4141, - "step": 13936 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3585888922214885e-06, - "loss": 0.4541, - "step": 13937 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3575853150266537e-06, - "loss": 0.4373, - "step": 13938 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3565820816418807e-06, - "loss": 0.4659, - "step": 13939 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3555791921070782e-06, - "loss": 0.4551, - "step": 13940 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3545766464621457e-06, - "loss": 0.5046, - "step": 13941 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3535744447469635e-06, - "loss": 0.4428, - "step": 13942 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3525725870014029e-06, - "loss": 0.4169, - "step": 13943 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3515710732653175e-06, - "loss": 0.4709, - "step": 13944 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3505699035785513e-06, - "loss": 0.4865, - "step": 13945 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3495690779809312e-06, - "loss": 0.4576, - "step": 13946 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.348568596512273e-06, - "loss": 0.5115, - "step": 13947 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3475684592123739e-06, - "loss": 0.4371, - "step": 13948 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3465686661210264e-06, - "loss": 0.4042, - "step": 13949 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3455692172780033e-06, - "loss": 0.4165, - "step": 13950 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3445701127230604e-06, - "loss": 0.496, - "step": 13951 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.343571352495946e-06, - "loss": 0.464, - "step": 13952 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3425729366363904e-06, - "loss": 0.4906, - "step": 13953 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3415748651841176e-06, - "loss": 0.4456, - "step": 13954 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.340577138178828e-06, - "loss": 0.323, - "step": 13955 - }, - { - "epoch": 2.52, - "grad_norm": 0.0, - "learning_rate": 1.3395797556602152e-06, - "loss": 0.3569, - "step": 13956 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3385827176679544e-06, - "loss": 0.4487, - "step": 13957 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3375860242417127e-06, - "loss": 0.4029, - "step": 13958 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3365896754211371e-06, - "loss": 0.4157, - "step": 13959 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3355936712458661e-06, - "loss": 0.4571, - "step": 13960 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3345980117555212e-06, - "loss": 0.4612, - "step": 13961 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3336026969897132e-06, - "loss": 0.3493, - "step": 13962 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3326077269880356e-06, - "loss": 0.566, - "step": 13963 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.331613101790068e-06, - "loss": 0.4947, - "step": 13964 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3306188214353866e-06, - "loss": 0.477, - "step": 13965 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3296248859635374e-06, - "loss": 0.3812, - "step": 13966 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3286312954140623e-06, - "loss": 0.4469, - "step": 13967 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3276380498264908e-06, - "loss": 0.4588, - "step": 13968 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.326645149240331e-06, - "loss": 0.4573, - "step": 13969 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.325652593695087e-06, - "loss": 0.3792, - "step": 13970 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.324660383230244e-06, - "loss": 0.4965, - "step": 13971 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3236685178852726e-06, - "loss": 0.4389, - "step": 13972 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3226769976996322e-06, - "loss": 0.5386, - "step": 13973 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3216858227127628e-06, - "loss": 0.3573, - "step": 13974 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3206949929640988e-06, - "loss": 0.4201, - "step": 13975 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3197045084930582e-06, - "loss": 0.4362, - "step": 13976 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3187143693390413e-06, - "loss": 0.419, - "step": 13977 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3177245755414391e-06, - "loss": 0.4011, - "step": 13978 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3167351271396267e-06, - "loss": 0.4409, - "step": 13979 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3157460241729659e-06, - "loss": 0.4249, - "step": 13980 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3147572666808052e-06, - "loss": 0.4066, - "step": 13981 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3137688547024797e-06, - "loss": 0.3861, - "step": 13982 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3127807882773092e-06, - "loss": 0.4203, - "step": 13983 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3117930674446e-06, - "loss": 0.4884, - "step": 13984 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3108056922436451e-06, - "loss": 0.4082, - "step": 13985 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3098186627137276e-06, - "loss": 0.4299, - "step": 13986 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3088319788941106e-06, - "loss": 0.4449, - "step": 13987 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.307845640824048e-06, - "loss": 0.5047, - "step": 13988 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3068596485427744e-06, - "loss": 0.3827, - "step": 13989 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3058740020895144e-06, - "loss": 0.38, - "step": 13990 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3048887015034827e-06, - "loss": 0.4608, - "step": 13991 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3039037468238735e-06, - "loss": 0.4281, - "step": 13992 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3029191380898698e-06, - "loss": 0.4824, - "step": 13993 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3019348753406425e-06, - "loss": 0.4619, - "step": 13994 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.3009509586153469e-06, - "loss": 0.4592, - "step": 13995 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2999673879531238e-06, - "loss": 0.4444, - "step": 13996 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.298984163393102e-06, - "loss": 0.3998, - "step": 13997 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.298001284974396e-06, - "loss": 0.4527, - "step": 13998 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2970187527361067e-06, - "loss": 0.4708, - "step": 13999 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2960365667173203e-06, - "loss": 0.3976, - "step": 14000 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2950547269571089e-06, - "loss": 0.4347, - "step": 14001 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2940732334945315e-06, - "loss": 0.4274, - "step": 14002 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2930920863686402e-06, - "loss": 0.4046, - "step": 14003 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2921112856184582e-06, - "loss": 0.4011, - "step": 14004 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2911308312830061e-06, - "loss": 0.4639, - "step": 14005 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.29015072340129e-06, - "loss": 0.4136, - "step": 14006 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2891709620122962e-06, - "loss": 0.4346, - "step": 14007 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2881915471550065e-06, - "loss": 0.4614, - "step": 14008 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2872124788683804e-06, - "loss": 0.3723, - "step": 14009 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2862337571913685e-06, - "loss": 0.4118, - "step": 14010 - }, - { - "epoch": 2.53, - "grad_norm": 0.0, - "learning_rate": 1.2852553821629066e-06, - "loss": 0.3772, - "step": 14011 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.284277353821911e-06, - "loss": 0.413, - "step": 14012 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2832996722072954e-06, - "loss": 0.4242, - "step": 14013 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.282322337357952e-06, - "loss": 0.4153, - "step": 14014 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2813453493127593e-06, - "loss": 0.4621, - "step": 14015 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2803687081105842e-06, - "loss": 0.4641, - "step": 14016 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2793924137902801e-06, - "loss": 0.4186, - "step": 14017 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.278416466390685e-06, - "loss": 0.4646, - "step": 14018 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2774408659506233e-06, - "loss": 0.4192, - "step": 14019 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.276465612508907e-06, - "loss": 0.4239, - "step": 14020 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2754907061043331e-06, - "loss": 0.4475, - "step": 14021 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.274516146775684e-06, - "loss": 0.4185, - "step": 14022 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2735419345617284e-06, - "loss": 0.4088, - "step": 14023 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2725680695012267e-06, - "loss": 0.4166, - "step": 14024 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2715945516329164e-06, - "loss": 0.388, - "step": 14025 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2706213809955314e-06, - "loss": 0.3932, - "step": 14026 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2696485576277795e-06, - "loss": 0.4359, - "step": 14027 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2686760815683608e-06, - "loss": 0.4182, - "step": 14028 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2677039528559675e-06, - "loss": 0.3675, - "step": 14029 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2667321715292713e-06, - "loss": 0.3699, - "step": 14030 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.26576073762693e-06, - "loss": 0.3375, - "step": 14031 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.264789651187588e-06, - "loss": 0.4199, - "step": 14032 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.263818912249879e-06, - "loss": 0.4615, - "step": 14033 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2628485208524188e-06, - "loss": 0.4051, - "step": 14034 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2618784770338132e-06, - "loss": 0.3795, - "step": 14035 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2609087808326503e-06, - "loss": 0.3918, - "step": 14036 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.259939432287507e-06, - "loss": 0.4345, - "step": 14037 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2589704314369455e-06, - "loss": 0.5189, - "step": 14038 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2580017783195141e-06, - "loss": 0.4474, - "step": 14039 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2570334729737466e-06, - "loss": 0.4358, - "step": 14040 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2560655154381685e-06, - "loss": 0.438, - "step": 14041 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2550979057512824e-06, - "loss": 0.4393, - "step": 14042 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.254130643951581e-06, - "loss": 0.4636, - "step": 14043 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2531637300775468e-06, - "loss": 0.409, - "step": 14044 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.25219716416764e-06, - "loss": 0.4492, - "step": 14045 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2512309462603178e-06, - "loss": 0.4763, - "step": 14046 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2502650763940171e-06, - "loss": 0.4584, - "step": 14047 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2492995546071629e-06, - "loss": 0.4267, - "step": 14048 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2483343809381599e-06, - "loss": 0.4429, - "step": 14049 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.247369555425406e-06, - "loss": 0.4035, - "step": 14050 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2464050781072878e-06, - "loss": 0.4463, - "step": 14051 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.245440949022172e-06, - "loss": 0.4193, - "step": 14052 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.244477168208411e-06, - "loss": 0.5223, - "step": 14053 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2435137357043492e-06, - "loss": 0.4386, - "step": 14054 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2425506515483098e-06, - "loss": 0.5057, - "step": 14055 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2415879157786092e-06, - "loss": 0.4813, - "step": 14056 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2406255284335444e-06, - "loss": 0.4885, - "step": 14057 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2396634895514025e-06, - "loss": 0.408, - "step": 14058 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.238701799170453e-06, - "loss": 0.3952, - "step": 14059 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.2377404573289564e-06, - "loss": 0.4503, - "step": 14060 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.236779464065152e-06, - "loss": 0.4212, - "step": 14061 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.235818819417275e-06, - "loss": 0.4077, - "step": 14062 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.23485852342354e-06, - "loss": 0.4298, - "step": 14063 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.233898576122149e-06, - "loss": 0.4651, - "step": 14064 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.232938977551289e-06, - "loss": 0.4528, - "step": 14065 - }, - { - "epoch": 2.54, - "grad_norm": 0.0, - "learning_rate": 1.231979727749133e-06, - "loss": 0.4567, - "step": 14066 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2310208267538448e-06, - "loss": 0.4904, - "step": 14067 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2300622746035718e-06, - "loss": 0.4233, - "step": 14068 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2291040713364443e-06, - "loss": 0.3937, - "step": 14069 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2281462169905822e-06, - "loss": 0.3996, - "step": 14070 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2271887116040915e-06, - "loss": 0.4555, - "step": 14071 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2262315552150617e-06, - "loss": 0.3832, - "step": 14072 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2252747478615711e-06, - "loss": 0.4885, - "step": 14073 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.224318289581683e-06, - "loss": 0.4114, - "step": 14074 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2233621804134466e-06, - "loss": 0.368, - "step": 14075 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2224064203948981e-06, - "loss": 0.4273, - "step": 14076 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2214510095640576e-06, - "loss": 0.3881, - "step": 14077 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2204959479589362e-06, - "loss": 0.4467, - "step": 14078 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2195412356175274e-06, - "loss": 0.4452, - "step": 14079 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2185868725778083e-06, - "loss": 0.493, - "step": 14080 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2176328588777476e-06, - "loss": 0.4516, - "step": 14081 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2166791945552937e-06, - "loss": 0.3309, - "step": 14082 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2157258796483906e-06, - "loss": 0.4613, - "step": 14083 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2147729141949605e-06, - "loss": 0.3868, - "step": 14084 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.213820298232914e-06, - "loss": 0.4568, - "step": 14085 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2128680318001495e-06, - "loss": 0.4155, - "step": 14086 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2119161149345427e-06, - "loss": 0.3629, - "step": 14087 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2109645476739707e-06, - "loss": 0.4973, - "step": 14088 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2100133300562855e-06, - "loss": 0.4469, - "step": 14089 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.209062462119328e-06, - "loss": 0.5083, - "step": 14090 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.208111943900926e-06, - "loss": 0.5353, - "step": 14091 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.207161775438891e-06, - "loss": 0.4637, - "step": 14092 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.206211956771024e-06, - "loss": 0.4374, - "step": 14093 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2052624879351105e-06, - "loss": 0.4515, - "step": 14094 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2043133689689213e-06, - "loss": 0.4988, - "step": 14095 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2033645999102127e-06, - "loss": 0.4127, - "step": 14096 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2024161807967316e-06, - "loss": 0.4675, - "step": 14097 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.2014681116662053e-06, - "loss": 0.4667, - "step": 14098 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.200520392556348e-06, - "loss": 0.5305, - "step": 14099 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1995730235048664e-06, - "loss": 0.4071, - "step": 14100 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.198626004549448e-06, - "loss": 0.4574, - "step": 14101 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1976793357277617e-06, - "loss": 0.3979, - "step": 14102 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1967330170774717e-06, - "loss": 0.4037, - "step": 14103 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1957870486362199e-06, - "loss": 0.4662, - "step": 14104 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1948414304416444e-06, - "loss": 0.4419, - "step": 14105 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1938961625313617e-06, - "loss": 0.3992, - "step": 14106 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1929512449429736e-06, - "loss": 0.4766, - "step": 14107 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1920066777140737e-06, - "loss": 0.4473, - "step": 14108 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1910624608822364e-06, - "loss": 0.3562, - "step": 14109 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1901185944850258e-06, - "loss": 0.4153, - "step": 14110 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1891750785599887e-06, - "loss": 0.4259, - "step": 14111 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1882319131446617e-06, - "loss": 0.3538, - "step": 14112 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.187289098276564e-06, - "loss": 0.3848, - "step": 14113 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1863466339932027e-06, - "loss": 0.3992, - "step": 14114 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1854045203320697e-06, - "loss": 0.4643, - "step": 14115 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1844627573306467e-06, - "loss": 0.5238, - "step": 14116 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1835213450263982e-06, - "loss": 0.4558, - "step": 14117 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1825802834567734e-06, - "loss": 0.37, - "step": 14118 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.18163957265921e-06, - "loss": 0.4584, - "step": 14119 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.180699212671128e-06, - "loss": 0.4285, - "step": 14120 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1797592035299422e-06, - "loss": 0.4132, - "step": 14121 - }, - { - "epoch": 2.55, - "grad_norm": 0.0, - "learning_rate": 1.1788195452730456e-06, - "loss": 0.4822, - "step": 14122 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1778802379378185e-06, - "loss": 0.4456, - "step": 14123 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.176941281561631e-06, - "loss": 0.453, - "step": 14124 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.17600267618183e-06, - "loss": 0.469, - "step": 14125 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1750644218357598e-06, - "loss": 0.4174, - "step": 14126 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1741265185607464e-06, - "loss": 0.4649, - "step": 14127 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1731889663940986e-06, - "loss": 0.3714, - "step": 14128 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1722517653731157e-06, - "loss": 0.5071, - "step": 14129 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.17131491553508e-06, - "loss": 0.5256, - "step": 14130 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1703784169172616e-06, - "loss": 0.375, - "step": 14131 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1694422695569152e-06, - "loss": 0.4492, - "step": 14132 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1685064734912831e-06, - "loss": 0.4683, - "step": 14133 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1675710287575926e-06, - "loss": 0.4223, - "step": 14134 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1666359353930578e-06, - "loss": 0.4825, - "step": 14135 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1657011934348772e-06, - "loss": 0.4783, - "step": 14136 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1647668029202353e-06, - "loss": 0.5194, - "step": 14137 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1638327638863079e-06, - "loss": 0.4295, - "step": 14138 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1628990763702518e-06, - "loss": 0.3854, - "step": 14139 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1619657404092078e-06, - "loss": 0.4309, - "step": 14140 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1610327560403067e-06, - "loss": 0.5226, - "step": 14141 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1601001233006637e-06, - "loss": 0.3201, - "step": 14142 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1591678422273823e-06, - "loss": 0.4207, - "step": 14143 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1582359128575493e-06, - "loss": 0.4811, - "step": 14144 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1573043352282386e-06, - "loss": 0.4283, - "step": 14145 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1563731093765096e-06, - "loss": 0.3823, - "step": 14146 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.155442235339409e-06, - "loss": 0.3789, - "step": 14147 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1545117131539675e-06, - "loss": 0.4079, - "step": 14148 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1535815428572029e-06, - "loss": 0.4757, - "step": 14149 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.152651724486119e-06, - "loss": 0.4948, - "step": 14150 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1517222580777066e-06, - "loss": 0.5033, - "step": 14151 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.15079314366894e-06, - "loss": 0.4038, - "step": 14152 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1498643812967803e-06, - "loss": 0.362, - "step": 14153 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1489359709981806e-06, - "loss": 0.3997, - "step": 14154 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1480079128100686e-06, - "loss": 0.4546, - "step": 14155 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1470802067693653e-06, - "loss": 0.4125, - "step": 14156 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1461528529129785e-06, - "loss": 0.4779, - "step": 14157 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1452258512777958e-06, - "loss": 0.3591, - "step": 14158 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1442992019007005e-06, - "loss": 0.434, - "step": 14159 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1433729048185537e-06, - "loss": 0.4292, - "step": 14160 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1424469600682043e-06, - "loss": 0.4933, - "step": 14161 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.141521367686491e-06, - "loss": 0.3946, - "step": 14162 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1405961277102295e-06, - "loss": 0.3113, - "step": 14163 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1396712401762321e-06, - "loss": 0.5357, - "step": 14164 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1387467051212919e-06, - "loss": 0.3993, - "step": 14165 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.137822522582188e-06, - "loss": 0.4675, - "step": 14166 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.136898692595686e-06, - "loss": 0.4808, - "step": 14167 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.135975215198537e-06, - "loss": 0.4891, - "step": 14168 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1350520904274764e-06, - "loss": 0.4631, - "step": 14169 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1341293183192348e-06, - "loss": 0.4924, - "step": 14170 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.133206898910515e-06, - "loss": 0.4379, - "step": 14171 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1322848322380141e-06, - "loss": 0.4467, - "step": 14172 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1313631183384144e-06, - "loss": 0.4109, - "step": 14173 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1304417572483805e-06, - "loss": 0.4217, - "step": 14174 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.12952074900457e-06, - "loss": 0.3629, - "step": 14175 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1286000936436192e-06, - "loss": 0.4617, - "step": 14176 - }, - { - "epoch": 2.56, - "grad_norm": 0.0, - "learning_rate": 1.1276797912021576e-06, - "loss": 0.3663, - "step": 14177 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1267598417167903e-06, - "loss": 0.4852, - "step": 14178 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1258402452241146e-06, - "loss": 0.4164, - "step": 14179 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1249210017607193e-06, - "loss": 0.4712, - "step": 14180 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1240021113631695e-06, - "loss": 0.3917, - "step": 14181 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1230835740680212e-06, - "loss": 0.3991, - "step": 14182 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1221653899118157e-06, - "loss": 0.347, - "step": 14183 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.12124755893108e-06, - "loss": 0.4734, - "step": 14184 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1203300811623263e-06, - "loss": 0.5127, - "step": 14185 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1194129566420531e-06, - "loss": 0.499, - "step": 14186 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1184961854067467e-06, - "loss": 0.4145, - "step": 14187 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1175797674928756e-06, - "loss": 0.4186, - "step": 14188 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1166637029368988e-06, - "loss": 0.4716, - "step": 14189 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1157479917752578e-06, - "loss": 0.4278, - "step": 14190 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1148326340443793e-06, - "loss": 0.3722, - "step": 14191 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.113917629780683e-06, - "loss": 0.3379, - "step": 14192 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1130029790205654e-06, - "loss": 0.4913, - "step": 14193 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1120886818004117e-06, - "loss": 0.4276, - "step": 14194 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1111747381565974e-06, - "loss": 0.4704, - "step": 14195 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1102611481254776e-06, - "loss": 0.4591, - "step": 14196 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1093479117434003e-06, - "loss": 0.392, - "step": 14197 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1084350290466929e-06, - "loss": 0.4102, - "step": 14198 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1075225000716716e-06, - "loss": 0.4032, - "step": 14199 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1066103248546424e-06, - "loss": 0.4625, - "step": 14200 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1056985034318846e-06, - "loss": 0.4575, - "step": 14201 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1047870358396795e-06, - "loss": 0.5231, - "step": 14202 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1038759221142847e-06, - "loss": 0.4317, - "step": 14203 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1029651622919457e-06, - "loss": 0.4168, - "step": 14204 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1020547564088935e-06, - "loss": 0.3797, - "step": 14205 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1011447045013468e-06, - "loss": 0.3961, - "step": 14206 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.1002350066055079e-06, - "loss": 0.423, - "step": 14207 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0993256627575665e-06, - "loss": 0.3644, - "step": 14208 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0984166729936974e-06, - "loss": 0.4494, - "step": 14209 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0975080373500636e-06, - "loss": 0.5311, - "step": 14210 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0965997558628094e-06, - "loss": 0.4754, - "step": 14211 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0956918285680685e-06, - "loss": 0.4605, - "step": 14212 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0947842555019617e-06, - "loss": 0.4466, - "step": 14213 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0938770367005936e-06, - "loss": 0.4593, - "step": 14214 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0929701722000552e-06, - "loss": 0.4246, - "step": 14215 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0920636620364211e-06, - "loss": 0.4384, - "step": 14216 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0911575062457514e-06, - "loss": 0.4909, - "step": 14217 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0902517048641003e-06, - "loss": 0.3989, - "step": 14218 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0893462579274995e-06, - "loss": 0.5189, - "step": 14219 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0884411654719695e-06, - "loss": 0.495, - "step": 14220 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0875364275335165e-06, - "loss": 0.3746, - "step": 14221 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0866320441481315e-06, - "loss": 0.3674, - "step": 14222 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0857280153517935e-06, - "loss": 0.4877, - "step": 14223 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.084824341180467e-06, - "loss": 0.4449, - "step": 14224 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0839210216701e-06, - "loss": 0.4185, - "step": 14225 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0830180568566285e-06, - "loss": 0.4066, - "step": 14226 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0821154467759752e-06, - "loss": 0.3611, - "step": 14227 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0812131914640455e-06, - "loss": 0.4425, - "step": 14228 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0803112909567325e-06, - "loss": 0.4068, - "step": 14229 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.079409745289921e-06, - "loss": 0.4631, - "step": 14230 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.078508554499469e-06, - "loss": 0.4098, - "step": 14231 - }, - { - "epoch": 2.57, - "grad_norm": 0.0, - "learning_rate": 1.0776077186212308e-06, - "loss": 0.4719, - "step": 14232 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0767072376910416e-06, - "loss": 0.4261, - "step": 14233 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0758071117447232e-06, - "loss": 0.3581, - "step": 14234 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0749073408180877e-06, - "loss": 0.5074, - "step": 14235 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0740079249469282e-06, - "loss": 0.4436, - "step": 14236 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.073108864167024e-06, - "loss": 0.486, - "step": 14237 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0722101585141442e-06, - "loss": 0.3907, - "step": 14238 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0713118080240348e-06, - "loss": 0.4946, - "step": 14239 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0704138127324392e-06, - "loss": 0.4182, - "step": 14240 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0695161726750792e-06, - "loss": 0.4153, - "step": 14241 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0686188878876635e-06, - "loss": 0.4224, - "step": 14242 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0677219584058895e-06, - "loss": 0.4074, - "step": 14243 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0668253842654386e-06, - "loss": 0.4682, - "step": 14244 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0659291655019766e-06, - "loss": 0.4925, - "step": 14245 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0650333021511571e-06, - "loss": 0.4118, - "step": 14246 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0641377942486198e-06, - "loss": 0.4853, - "step": 14247 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.063242641829989e-06, - "loss": 0.3847, - "step": 14248 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0623478449308755e-06, - "loss": 0.4307, - "step": 14249 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.061453403586874e-06, - "loss": 0.3659, - "step": 14250 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0605593178335693e-06, - "loss": 0.4905, - "step": 14251 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.059665587706531e-06, - "loss": 0.4426, - "step": 14252 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0587722132413115e-06, - "loss": 0.454, - "step": 14253 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.057879194473449e-06, - "loss": 0.3687, - "step": 14254 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0569865314384697e-06, - "loss": 0.4001, - "step": 14255 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0560942241718875e-06, - "loss": 0.4032, - "step": 14256 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0552022727091994e-06, - "loss": 0.423, - "step": 14257 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.054310677085888e-06, - "loss": 0.4132, - "step": 14258 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0534194373374228e-06, - "loss": 0.4329, - "step": 14259 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.052528553499258e-06, - "loss": 0.4528, - "step": 14260 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.051638025606836e-06, - "loss": 0.4403, - "step": 14261 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0507478536955828e-06, - "loss": 0.4771, - "step": 14262 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0498580378009103e-06, - "loss": 0.4194, - "step": 14263 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0489685779582182e-06, - "loss": 0.4635, - "step": 14264 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0480794742028888e-06, - "loss": 0.4559, - "step": 14265 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.047190726570293e-06, - "loss": 0.4416, - "step": 14266 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.046302335095788e-06, - "loss": 0.4533, - "step": 14267 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0454142998147166e-06, - "loss": 0.4537, - "step": 14268 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0445266207624028e-06, - "loss": 0.4465, - "step": 14269 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0436392979741616e-06, - "loss": 0.4098, - "step": 14270 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0427523314852906e-06, - "loss": 0.4656, - "step": 14271 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0418657213310778e-06, - "loss": 0.421, - "step": 14272 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0409794675467932e-06, - "loss": 0.399, - "step": 14273 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0400935701676928e-06, - "loss": 0.4132, - "step": 14274 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0392080292290197e-06, - "loss": 0.3561, - "step": 14275 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0383228447660021e-06, - "loss": 0.3945, - "step": 14276 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0374380168138532e-06, - "loss": 0.458, - "step": 14277 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0365535454077736e-06, - "loss": 0.4864, - "step": 14278 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0356694305829496e-06, - "loss": 0.3878, - "step": 14279 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.034785672374552e-06, - "loss": 0.3993, - "step": 14280 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0339022708177393e-06, - "loss": 0.4503, - "step": 14281 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0330192259476535e-06, - "loss": 0.4064, - "step": 14282 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.032136537799424e-06, - "loss": 0.5706, - "step": 14283 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0312542064081666e-06, - "loss": 0.4562, - "step": 14284 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0303722318089816e-06, - "loss": 0.3799, - "step": 14285 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0294906140369542e-06, - "loss": 0.4315, - "step": 14286 - }, - { - "epoch": 2.58, - "grad_norm": 0.0, - "learning_rate": 1.0286093531271567e-06, - "loss": 0.3871, - "step": 14287 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0277284491146477e-06, - "loss": 0.457, - "step": 14288 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0268479020344723e-06, - "loss": 0.4466, - "step": 14289 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0259677119216594e-06, - "loss": 0.4475, - "step": 14290 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0250878788112261e-06, - "loss": 0.4484, - "step": 14291 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0242084027381704e-06, - "loss": 0.4624, - "step": 14292 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0233292837374775e-06, - "loss": 0.4938, - "step": 14293 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0224505218441271e-06, - "loss": 0.5122, - "step": 14294 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0215721170930737e-06, - "loss": 0.4302, - "step": 14295 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0206940695192623e-06, - "loss": 0.5353, - "step": 14296 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0198163791576233e-06, - "loss": 0.4342, - "step": 14297 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0189390460430725e-06, - "loss": 0.4644, - "step": 14298 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0180620702105126e-06, - "loss": 0.4674, - "step": 14299 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0171854516948298e-06, - "loss": 0.4278, - "step": 14300 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0163091905308987e-06, - "loss": 0.4161, - "step": 14301 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0154332867535776e-06, - "loss": 0.442, - "step": 14302 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0145577403977114e-06, - "loss": 0.4023, - "step": 14303 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0136825514981298e-06, - "loss": 0.3774, - "step": 14304 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0128077200896524e-06, - "loss": 0.4847, - "step": 14305 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0119332462070819e-06, - "loss": 0.3615, - "step": 14306 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0110591298852013e-06, - "loss": 0.446, - "step": 14307 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0101853711587882e-06, - "loss": 0.4875, - "step": 14308 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0093119700625998e-06, - "loss": 0.3701, - "step": 14309 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0084389266313855e-06, - "loss": 0.4027, - "step": 14310 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0075662408998732e-06, - "loss": 0.4633, - "step": 14311 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0066939129027808e-06, - "loss": 0.4565, - "step": 14312 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0058219426748116e-06, - "loss": 0.4123, - "step": 14313 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0049503302506524e-06, - "loss": 0.5038, - "step": 14314 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0040790756649798e-06, - "loss": 0.4401, - "step": 14315 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0032081789524517e-06, - "loss": 0.4259, - "step": 14316 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0023376401477135e-06, - "loss": 0.3974, - "step": 14317 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.001467459285399e-06, - "loss": 0.4345, - "step": 14318 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 1.0005976364001247e-06, - "loss": 0.4516, - "step": 14319 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.997281715264928e-07, - "loss": 0.4258, - "step": 14320 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.988590646990925e-07, - "loss": 0.3381, - "step": 14321 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.979903159524984e-07, - "loss": 0.4209, - "step": 14322 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.971219253212705e-07, - "loss": 0.5465, - "step": 14323 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.962538928399557e-07, - "loss": 0.3644, - "step": 14324 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.953862185430851e-07, - "loss": 0.3783, - "step": 14325 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.945189024651747e-07, - "loss": 0.4541, - "step": 14326 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.936519446407312e-07, - "loss": 0.3664, - "step": 14327 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.927853451042424e-07, - "loss": 0.445, - "step": 14328 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.919191038901843e-07, - "loss": 0.4328, - "step": 14329 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.910532210330148e-07, - "loss": 0.4208, - "step": 14330 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.901876965671787e-07, - "loss": 0.4905, - "step": 14331 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.893225305271126e-07, - "loss": 0.3742, - "step": 14332 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.884577229472326e-07, - "loss": 0.4726, - "step": 14333 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.875932738619421e-07, - "loss": 0.4781, - "step": 14334 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.867291833056303e-07, - "loss": 0.4156, - "step": 14335 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.85865451312672e-07, - "loss": 0.5035, - "step": 14336 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.850020779174285e-07, - "loss": 0.3966, - "step": 14337 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.841390631542457e-07, - "loss": 0.4277, - "step": 14338 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.832764070574551e-07, - "loss": 0.3738, - "step": 14339 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.82414109661377e-07, - "loss": 0.4179, - "step": 14340 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.815521710003129e-07, - "loss": 0.4785, - "step": 14341 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.806905911085507e-07, - "loss": 0.4421, - "step": 14342 - }, - { - "epoch": 2.59, - "grad_norm": 0.0, - "learning_rate": 9.798293700203698e-07, - "loss": 0.4516, - "step": 14343 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.789685077700318e-07, - "loss": 0.4779, - "step": 14344 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.78108004391778e-07, - "loss": 0.5037, - "step": 14345 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.77247859919842e-07, - "loss": 0.4564, - "step": 14346 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.763880743884423e-07, - "loss": 0.3572, - "step": 14347 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.75528647831785e-07, - "loss": 0.4501, - "step": 14348 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.746695802840577e-07, - "loss": 0.4326, - "step": 14349 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.73810871779436e-07, - "loss": 0.5047, - "step": 14350 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.72952522352082e-07, - "loss": 0.4536, - "step": 14351 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.720945320361374e-07, - "loss": 0.3924, - "step": 14352 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.712369008657396e-07, - "loss": 0.4142, - "step": 14353 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.703796288750056e-07, - "loss": 0.4474, - "step": 14354 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.695227160980382e-07, - "loss": 0.4276, - "step": 14355 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.686661625689264e-07, - "loss": 0.3859, - "step": 14356 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.678099683217468e-07, - "loss": 0.4352, - "step": 14357 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.669541333905597e-07, - "loss": 0.4411, - "step": 14358 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.660986578094122e-07, - "loss": 0.4151, - "step": 14359 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.652435416123351e-07, - "loss": 0.4414, - "step": 14360 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.643887848333477e-07, - "loss": 0.4674, - "step": 14361 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.63534387506454e-07, - "loss": 0.5348, - "step": 14362 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.626803496656422e-07, - "loss": 0.507, - "step": 14363 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.618266713448855e-07, - "loss": 0.4067, - "step": 14364 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.609733525781494e-07, - "loss": 0.4641, - "step": 14365 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.601203933993774e-07, - "loss": 0.3719, - "step": 14366 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.592677938425054e-07, - "loss": 0.4355, - "step": 14367 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.584155539414453e-07, - "loss": 0.4425, - "step": 14368 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.57563673730102e-07, - "loss": 0.3994, - "step": 14369 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.567121532423683e-07, - "loss": 0.3831, - "step": 14370 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.558609925121176e-07, - "loss": 0.5093, - "step": 14371 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.55010191573209e-07, - "loss": 0.4168, - "step": 14372 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.541597504594902e-07, - "loss": 0.5042, - "step": 14373 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.533096692047938e-07, - "loss": 0.4339, - "step": 14374 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.524599478429353e-07, - "loss": 0.4716, - "step": 14375 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.516105864077208e-07, - "loss": 0.4069, - "step": 14376 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.507615849329365e-07, - "loss": 0.4404, - "step": 14377 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.49912943452359e-07, - "loss": 0.388, - "step": 14378 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.49064661999749e-07, - "loss": 0.4685, - "step": 14379 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.482167406088494e-07, - "loss": 0.4279, - "step": 14380 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.473691793133966e-07, - "loss": 0.3859, - "step": 14381 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.46521978147108e-07, - "loss": 0.4027, - "step": 14382 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.456751371436812e-07, - "loss": 0.4573, - "step": 14383 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.448286563368103e-07, - "loss": 0.3776, - "step": 14384 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.439825357601651e-07, - "loss": 0.5288, - "step": 14385 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.431367754474097e-07, - "loss": 0.5336, - "step": 14386 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.422913754321894e-07, - "loss": 0.4353, - "step": 14387 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.414463357481341e-07, - "loss": 0.4422, - "step": 14388 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.406016564288644e-07, - "loss": 0.4501, - "step": 14389 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.397573375079771e-07, - "loss": 0.4667, - "step": 14390 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.389133790190652e-07, - "loss": 0.4097, - "step": 14391 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.380697809957007e-07, - "loss": 0.4521, - "step": 14392 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.372265434714456e-07, - "loss": 0.4783, - "step": 14393 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.363836664798431e-07, - "loss": 0.457, - "step": 14394 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.355411500544265e-07, - "loss": 0.3714, - "step": 14395 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.346989942287077e-07, - "loss": 0.43, - "step": 14396 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.338571990361978e-07, - "loss": 0.3846, - "step": 14397 - }, - { - "epoch": 2.6, - "grad_norm": 0.0, - "learning_rate": 9.330157645103765e-07, - "loss": 0.4031, - "step": 14398 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.321746906847218e-07, - "loss": 0.4045, - "step": 14399 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.313339775926911e-07, - "loss": 0.3933, - "step": 14400 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.304936252677288e-07, - "loss": 0.4907, - "step": 14401 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.296536337432693e-07, - "loss": 0.411, - "step": 14402 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.28814003052726e-07, - "loss": 0.4281, - "step": 14403 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.27974733229503e-07, - "loss": 0.4522, - "step": 14404 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.271358243069861e-07, - "loss": 0.4396, - "step": 14405 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.262972763185452e-07, - "loss": 0.436, - "step": 14406 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.254590892975456e-07, - "loss": 0.3563, - "step": 14407 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.246212632773288e-07, - "loss": 0.4081, - "step": 14408 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.237837982912246e-07, - "loss": 0.4428, - "step": 14409 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.229466943725496e-07, - "loss": 0.4005, - "step": 14410 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.22109951554605e-07, - "loss": 0.4528, - "step": 14411 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.212735698706776e-07, - "loss": 0.4681, - "step": 14412 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.204375493540408e-07, - "loss": 0.4488, - "step": 14413 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.196018900379522e-07, - "loss": 0.3778, - "step": 14414 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.187665919556565e-07, - "loss": 0.4897, - "step": 14415 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.179316551403816e-07, - "loss": 0.4258, - "step": 14416 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.170970796253453e-07, - "loss": 0.4184, - "step": 14417 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.162628654437445e-07, - "loss": 0.4558, - "step": 14418 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.154290126287702e-07, - "loss": 0.3946, - "step": 14419 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.145955212135937e-07, - "loss": 0.4593, - "step": 14420 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.137623912313698e-07, - "loss": 0.4568, - "step": 14421 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.12929622715244e-07, - "loss": 0.4903, - "step": 14422 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.120972156983432e-07, - "loss": 0.4524, - "step": 14423 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.112651702137853e-07, - "loss": 0.4687, - "step": 14424 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.104334862946684e-07, - "loss": 0.3686, - "step": 14425 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.096021639740793e-07, - "loss": 0.4261, - "step": 14426 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.087712032850904e-07, - "loss": 0.3643, - "step": 14427 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.07940604260753e-07, - "loss": 0.3642, - "step": 14428 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.071103669341164e-07, - "loss": 0.3907, - "step": 14429 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.062804913382061e-07, - "loss": 0.4481, - "step": 14430 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.054509775060372e-07, - "loss": 0.4131, - "step": 14431 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.046218254706074e-07, - "loss": 0.3441, - "step": 14432 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.037930352649027e-07, - "loss": 0.4401, - "step": 14433 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.029646069218912e-07, - "loss": 0.4003, - "step": 14434 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.021365404745363e-07, - "loss": 0.4283, - "step": 14435 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.013088359557732e-07, - "loss": 0.4214, - "step": 14436 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 9.004814933985317e-07, - "loss": 0.4348, - "step": 14437 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.996545128357248e-07, - "loss": 0.4618, - "step": 14438 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.988278943002482e-07, - "loss": 0.4764, - "step": 14439 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.980016378249922e-07, - "loss": 0.4441, - "step": 14440 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.97175743442823e-07, - "loss": 0.4219, - "step": 14441 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.963502111865996e-07, - "loss": 0.3719, - "step": 14442 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.955250410891569e-07, - "loss": 0.4115, - "step": 14443 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.947002331833243e-07, - "loss": 0.4318, - "step": 14444 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.938757875019155e-07, - "loss": 0.4261, - "step": 14445 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.930517040777287e-07, - "loss": 0.4295, - "step": 14446 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.922279829435454e-07, - "loss": 0.4713, - "step": 14447 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.91404624132135e-07, - "loss": 0.305, - "step": 14448 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.905816276762536e-07, - "loss": 0.4258, - "step": 14449 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.897589936086393e-07, - "loss": 0.4546, - "step": 14450 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.889367219620193e-07, - "loss": 0.4288, - "step": 14451 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.881148127691052e-07, - "loss": 0.3595, - "step": 14452 - }, - { - "epoch": 2.61, - "grad_norm": 0.0, - "learning_rate": 8.872932660625933e-07, - "loss": 0.4208, - "step": 14453 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.864720818751649e-07, - "loss": 0.4283, - "step": 14454 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.856512602394907e-07, - "loss": 0.3889, - "step": 14455 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.848308011882212e-07, - "loss": 0.4877, - "step": 14456 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.840107047540014e-07, - "loss": 0.3652, - "step": 14457 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.831909709694497e-07, - "loss": 0.4469, - "step": 14458 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.823715998671789e-07, - "loss": 0.3886, - "step": 14459 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.815525914797862e-07, - "loss": 0.3843, - "step": 14460 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.807339458398501e-07, - "loss": 0.4408, - "step": 14461 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.79915662979941e-07, - "loss": 0.4679, - "step": 14462 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.79097742932612e-07, - "loss": 0.3759, - "step": 14463 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.782801857303979e-07, - "loss": 0.4415, - "step": 14464 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.774629914058274e-07, - "loss": 0.4332, - "step": 14465 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.766461599914022e-07, - "loss": 0.4197, - "step": 14466 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.758296915196251e-07, - "loss": 0.4523, - "step": 14467 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.750135860229725e-07, - "loss": 0.4309, - "step": 14468 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.741978435339116e-07, - "loss": 0.3775, - "step": 14469 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.733824640848943e-07, - "loss": 0.5129, - "step": 14470 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.725674477083568e-07, - "loss": 0.3464, - "step": 14471 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.7175279443672e-07, - "loss": 0.4131, - "step": 14472 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.709385043023988e-07, - "loss": 0.398, - "step": 14473 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.70124577337782e-07, - "loss": 0.5137, - "step": 14474 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.69311013575248e-07, - "loss": 0.4062, - "step": 14475 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.684978130471655e-07, - "loss": 0.499, - "step": 14476 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.676849757858796e-07, - "loss": 0.4397, - "step": 14477 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.668725018237334e-07, - "loss": 0.4812, - "step": 14478 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.660603911930432e-07, - "loss": 0.417, - "step": 14479 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.65248643926121e-07, - "loss": 0.4887, - "step": 14480 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.644372600552542e-07, - "loss": 0.4118, - "step": 14481 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.636262396127215e-07, - "loss": 0.4757, - "step": 14482 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.628155826307904e-07, - "loss": 0.4107, - "step": 14483 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.620052891417086e-07, - "loss": 0.3823, - "step": 14484 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.611953591777101e-07, - "loss": 0.404, - "step": 14485 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.603857927710157e-07, - "loss": 0.3862, - "step": 14486 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.595765899538322e-07, - "loss": 0.4395, - "step": 14487 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.587677507583502e-07, - "loss": 0.5115, - "step": 14488 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.579592752167465e-07, - "loss": 0.4159, - "step": 14489 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.57151163361184e-07, - "loss": 0.466, - "step": 14490 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.563434152238115e-07, - "loss": 0.4844, - "step": 14491 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.55536030836761e-07, - "loss": 0.5419, - "step": 14492 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.547290102321514e-07, - "loss": 0.4553, - "step": 14493 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.539223534420893e-07, - "loss": 0.3757, - "step": 14494 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.531160604986666e-07, - "loss": 0.4144, - "step": 14495 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.523101314339555e-07, - "loss": 0.4724, - "step": 14496 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.51504566280017e-07, - "loss": 0.4623, - "step": 14497 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.506993650688978e-07, - "loss": 0.4784, - "step": 14498 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.498945278326331e-07, - "loss": 0.3966, - "step": 14499 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.490900546032388e-07, - "loss": 0.4031, - "step": 14500 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.482859454127191e-07, - "loss": 0.3567, - "step": 14501 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.474822002930616e-07, - "loss": 0.3969, - "step": 14502 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.46678819276242e-07, - "loss": 0.466, - "step": 14503 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.458758023942193e-07, - "loss": 0.3568, - "step": 14504 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.450731496789388e-07, - "loss": 0.47, - "step": 14505 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.442708611623318e-07, - "loss": 0.4244, - "step": 14506 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.434689368763149e-07, - "loss": 0.4225, - "step": 14507 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.426673768527893e-07, - "loss": 0.4221, - "step": 14508 - }, - { - "epoch": 2.62, - "grad_norm": 0.0, - "learning_rate": 8.418661811236429e-07, - "loss": 0.4797, - "step": 14509 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.410653497207489e-07, - "loss": 0.3563, - "step": 14510 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.402648826759652e-07, - "loss": 0.4, - "step": 14511 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.394647800211353e-07, - "loss": 0.518, - "step": 14512 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.386650417880904e-07, - "loss": 0.4159, - "step": 14513 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.378656680086439e-07, - "loss": 0.4054, - "step": 14514 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.370666587145948e-07, - "loss": 0.3828, - "step": 14515 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.362680139377321e-07, - "loss": 0.3809, - "step": 14516 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.354697337098271e-07, - "loss": 0.4999, - "step": 14517 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.346718180626378e-07, - "loss": 0.5198, - "step": 14518 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.338742670279021e-07, - "loss": 0.4072, - "step": 14519 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.3307708063735e-07, - "loss": 0.4848, - "step": 14520 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.322802589226964e-07, - "loss": 0.4166, - "step": 14521 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.31483801915639e-07, - "loss": 0.375, - "step": 14522 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.306877096478627e-07, - "loss": 0.4277, - "step": 14523 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.298919821510365e-07, - "loss": 0.471, - "step": 14524 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.290966194568173e-07, - "loss": 0.3634, - "step": 14525 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.283016215968454e-07, - "loss": 0.4022, - "step": 14526 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.275069886027465e-07, - "loss": 0.4927, - "step": 14527 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.267127205061331e-07, - "loss": 0.4191, - "step": 14528 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.259188173386023e-07, - "loss": 0.4518, - "step": 14529 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.251252791317377e-07, - "loss": 0.4671, - "step": 14530 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.24332105917105e-07, - "loss": 0.4819, - "step": 14531 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.235392977262613e-07, - "loss": 0.4671, - "step": 14532 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.227468545907458e-07, - "loss": 0.4217, - "step": 14533 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.21954776542081e-07, - "loss": 0.4468, - "step": 14534 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.211630636117773e-07, - "loss": 0.4211, - "step": 14535 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.203717158313296e-07, - "loss": 0.5068, - "step": 14536 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.195807332322225e-07, - "loss": 0.4708, - "step": 14537 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.18790115845921e-07, - "loss": 0.5029, - "step": 14538 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.179998637038766e-07, - "loss": 0.423, - "step": 14539 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.172099768375274e-07, - "loss": 0.3861, - "step": 14540 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.164204552782972e-07, - "loss": 0.458, - "step": 14541 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.156312990575921e-07, - "loss": 0.4381, - "step": 14542 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.14842508206809e-07, - "loss": 0.4457, - "step": 14543 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.140540827573251e-07, - "loss": 0.5028, - "step": 14544 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.132660227405065e-07, - "loss": 0.427, - "step": 14545 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.124783281877036e-07, - "loss": 0.3939, - "step": 14546 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.116909991302513e-07, - "loss": 0.4292, - "step": 14547 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.109040355994713e-07, - "loss": 0.4479, - "step": 14548 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.101174376266707e-07, - "loss": 0.4353, - "step": 14549 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.093312052431423e-07, - "loss": 0.3316, - "step": 14550 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.085453384801622e-07, - "loss": 0.4259, - "step": 14551 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.077598373689944e-07, - "loss": 0.3965, - "step": 14552 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.069747019408858e-07, - "loss": 0.41, - "step": 14553 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.061899322270749e-07, - "loss": 0.4859, - "step": 14554 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.054055282587769e-07, - "loss": 0.3738, - "step": 14555 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.046214900672012e-07, - "loss": 0.4317, - "step": 14556 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.038378176835326e-07, - "loss": 0.442, - "step": 14557 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.030545111389476e-07, - "loss": 0.4086, - "step": 14558 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.022715704646112e-07, - "loss": 0.3365, - "step": 14559 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.014889956916694e-07, - "loss": 0.4421, - "step": 14560 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 8.007067868512519e-07, - "loss": 0.3987, - "step": 14561 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 7.999249439744783e-07, - "loss": 0.42, - "step": 14562 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 7.991434670924513e-07, - "loss": 0.4389, - "step": 14563 - }, - { - "epoch": 2.63, - "grad_norm": 0.0, - "learning_rate": 7.983623562362585e-07, - "loss": 0.4705, - "step": 14564 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.97581611436975e-07, - "loss": 0.511, - "step": 14565 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.968012327256591e-07, - "loss": 0.4658, - "step": 14566 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.960212201333561e-07, - "loss": 0.4616, - "step": 14567 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.952415736910968e-07, - "loss": 0.4859, - "step": 14568 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.94462293429894e-07, - "loss": 0.3896, - "step": 14569 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.93683379380753e-07, - "loss": 0.4739, - "step": 14570 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.929048315746613e-07, - "loss": 0.4773, - "step": 14571 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.92126650042585e-07, - "loss": 0.4271, - "step": 14572 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.913488348154852e-07, - "loss": 0.355, - "step": 14573 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.905713859243014e-07, - "loss": 0.4683, - "step": 14574 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.897943033999667e-07, - "loss": 0.4984, - "step": 14575 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.89017587273393e-07, - "loss": 0.4073, - "step": 14576 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.882412375754789e-07, - "loss": 0.4759, - "step": 14577 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.874652543371087e-07, - "loss": 0.3826, - "step": 14578 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.86689637589153e-07, - "loss": 0.3776, - "step": 14579 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.859143873624664e-07, - "loss": 0.4679, - "step": 14580 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.851395036878895e-07, - "loss": 0.4828, - "step": 14581 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.843649865962499e-07, - "loss": 0.4542, - "step": 14582 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.835908361183587e-07, - "loss": 0.4144, - "step": 14583 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.82817052285012e-07, - "loss": 0.4535, - "step": 14584 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.820436351269911e-07, - "loss": 0.4801, - "step": 14585 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.81270584675069e-07, - "loss": 0.4762, - "step": 14586 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.804979009599944e-07, - "loss": 0.3913, - "step": 14587 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.797255840125073e-07, - "loss": 0.3192, - "step": 14588 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.789536338633308e-07, - "loss": 0.4944, - "step": 14589 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.781820505431737e-07, - "loss": 0.4605, - "step": 14590 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.774108340827347e-07, - "loss": 0.4414, - "step": 14591 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.766399845126916e-07, - "loss": 0.4793, - "step": 14592 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.758695018637108e-07, - "loss": 0.4111, - "step": 14593 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.750993861664446e-07, - "loss": 0.4448, - "step": 14594 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.743296374515241e-07, - "loss": 0.4671, - "step": 14595 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.73560255749577e-07, - "loss": 0.4349, - "step": 14596 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.727912410912097e-07, - "loss": 0.487, - "step": 14597 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.720225935070125e-07, - "loss": 0.4644, - "step": 14598 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.712543130275651e-07, - "loss": 0.409, - "step": 14599 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.70486399683431e-07, - "loss": 0.3377, - "step": 14600 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.697188535051591e-07, - "loss": 0.369, - "step": 14601 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.689516745232839e-07, - "loss": 0.4767, - "step": 14602 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.681848627683242e-07, - "loss": 0.4013, - "step": 14603 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.674184182707866e-07, - "loss": 0.3804, - "step": 14604 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.666523410611593e-07, - "loss": 0.4774, - "step": 14605 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.658866311699209e-07, - "loss": 0.4227, - "step": 14606 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.651212886275283e-07, - "loss": 0.4997, - "step": 14607 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.643563134644327e-07, - "loss": 0.4639, - "step": 14608 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.635917057110664e-07, - "loss": 0.3678, - "step": 14609 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.62827465397844e-07, - "loss": 0.3977, - "step": 14610 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.620635925551689e-07, - "loss": 0.4229, - "step": 14611 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.613000872134268e-07, - "loss": 0.3794, - "step": 14612 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.605369494029968e-07, - "loss": 0.4042, - "step": 14613 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.597741791542346e-07, - "loss": 0.4471, - "step": 14614 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.590117764974859e-07, - "loss": 0.3542, - "step": 14615 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.582497414630796e-07, - "loss": 0.4767, - "step": 14616 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.574880740813273e-07, - "loss": 0.4737, - "step": 14617 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.567267743825358e-07, - "loss": 0.4553, - "step": 14618 - }, - { - "epoch": 2.64, - "grad_norm": 0.0, - "learning_rate": 7.559658423969863e-07, - "loss": 0.5151, - "step": 14619 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.552052781549524e-07, - "loss": 0.4835, - "step": 14620 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.544450816866899e-07, - "loss": 0.4856, - "step": 14621 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.536852530224403e-07, - "loss": 0.448, - "step": 14622 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.529257921924294e-07, - "loss": 0.469, - "step": 14623 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.521666992268739e-07, - "loss": 0.4354, - "step": 14624 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.514079741559699e-07, - "loss": 0.4863, - "step": 14625 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.506496170098987e-07, - "loss": 0.4076, - "step": 14626 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.498916278188318e-07, - "loss": 0.4598, - "step": 14627 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.491340066129204e-07, - "loss": 0.435, - "step": 14628 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.483767534223063e-07, - "loss": 0.4948, - "step": 14629 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.476198682771152e-07, - "loss": 0.4347, - "step": 14630 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.468633512074552e-07, - "loss": 0.4194, - "step": 14631 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.461072022434257e-07, - "loss": 0.3674, - "step": 14632 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.453514214150992e-07, - "loss": 0.4326, - "step": 14633 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.445960087525505e-07, - "loss": 0.3943, - "step": 14634 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.438409642858268e-07, - "loss": 0.4287, - "step": 14635 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.430862880449674e-07, - "loss": 0.4366, - "step": 14636 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.423319800599926e-07, - "loss": 0.554, - "step": 14637 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.415780403609108e-07, - "loss": 0.4619, - "step": 14638 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.408244689777155e-07, - "loss": 0.4369, - "step": 14639 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.400712659403841e-07, - "loss": 0.3885, - "step": 14640 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.393184312788815e-07, - "loss": 0.4952, - "step": 14641 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.385659650231558e-07, - "loss": 0.3507, - "step": 14642 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.378138672031421e-07, - "loss": 0.5548, - "step": 14643 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.370621378487597e-07, - "loss": 0.4787, - "step": 14644 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.363107769899114e-07, - "loss": 0.4751, - "step": 14645 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.355597846564922e-07, - "loss": 0.4182, - "step": 14646 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.348091608783769e-07, - "loss": 0.4688, - "step": 14647 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.340589056854241e-07, - "loss": 0.4807, - "step": 14648 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.333090191074821e-07, - "loss": 0.4338, - "step": 14649 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.325595011743791e-07, - "loss": 0.4432, - "step": 14650 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.318103519159381e-07, - "loss": 0.4179, - "step": 14651 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.310615713619573e-07, - "loss": 0.3961, - "step": 14652 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.303131595422263e-07, - "loss": 0.3822, - "step": 14653 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.295651164865192e-07, - "loss": 0.5221, - "step": 14654 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.288174422245897e-07, - "loss": 0.3769, - "step": 14655 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.280701367861864e-07, - "loss": 0.5217, - "step": 14656 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.273232002010366e-07, - "loss": 0.4392, - "step": 14657 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.265766324988555e-07, - "loss": 0.4689, - "step": 14658 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.258304337093414e-07, - "loss": 0.4257, - "step": 14659 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.250846038621806e-07, - "loss": 0.4918, - "step": 14660 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.243391429870417e-07, - "loss": 0.4339, - "step": 14661 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.235940511135853e-07, - "loss": 0.4472, - "step": 14662 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.228493282714466e-07, - "loss": 0.4184, - "step": 14663 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.221049744902542e-07, - "loss": 0.381, - "step": 14664 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.213609897996199e-07, - "loss": 0.4114, - "step": 14665 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.206173742291389e-07, - "loss": 0.3713, - "step": 14666 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.198741278083976e-07, - "loss": 0.4079, - "step": 14667 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.191312505669601e-07, - "loss": 0.4219, - "step": 14668 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.183887425343827e-07, - "loss": 0.5164, - "step": 14669 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.176466037401996e-07, - "loss": 0.4364, - "step": 14670 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.16904834213935e-07, - "loss": 0.4263, - "step": 14671 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.161634339851009e-07, - "loss": 0.4825, - "step": 14672 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.15422403083188e-07, - "loss": 0.4878, - "step": 14673 - }, - { - "epoch": 2.65, - "grad_norm": 0.0, - "learning_rate": 7.146817415376784e-07, - "loss": 0.4391, - "step": 14674 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.13941449378035e-07, - "loss": 0.4219, - "step": 14675 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.1320152663371e-07, - "loss": 0.4261, - "step": 14676 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.124619733341365e-07, - "loss": 0.5294, - "step": 14677 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.117227895087353e-07, - "loss": 0.4565, - "step": 14678 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.109839751869141e-07, - "loss": 0.4214, - "step": 14679 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.102455303980638e-07, - "loss": 0.4533, - "step": 14680 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.095074551715597e-07, - "loss": 0.4163, - "step": 14681 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.08769749536764e-07, - "loss": 0.4788, - "step": 14682 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.080324135230255e-07, - "loss": 0.4552, - "step": 14683 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.072954471596749e-07, - "loss": 0.4874, - "step": 14684 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.065588504760324e-07, - "loss": 0.4003, - "step": 14685 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.058226235013987e-07, - "loss": 0.4129, - "step": 14686 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.050867662650606e-07, - "loss": 0.5026, - "step": 14687 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.043512787962958e-07, - "loss": 0.4377, - "step": 14688 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.036161611243619e-07, - "loss": 0.4207, - "step": 14689 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.028814132785022e-07, - "loss": 0.4501, - "step": 14690 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.021470352879467e-07, - "loss": 0.4231, - "step": 14691 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.014130271819097e-07, - "loss": 0.3691, - "step": 14692 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 7.006793889895924e-07, - "loss": 0.3992, - "step": 14693 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.999461207401803e-07, - "loss": 0.4618, - "step": 14694 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.992132224628423e-07, - "loss": 0.4729, - "step": 14695 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.984806941867362e-07, - "loss": 0.3755, - "step": 14696 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.977485359410019e-07, - "loss": 0.4433, - "step": 14697 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.970167477547662e-07, - "loss": 0.4446, - "step": 14698 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.962853296571404e-07, - "loss": 0.439, - "step": 14699 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.955542816772254e-07, - "loss": 0.4843, - "step": 14700 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.948236038440992e-07, - "loss": 0.3948, - "step": 14701 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.940932961868296e-07, - "loss": 0.4257, - "step": 14702 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.933633587344712e-07, - "loss": 0.5457, - "step": 14703 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.926337915160597e-07, - "loss": 0.4325, - "step": 14704 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.919045945606229e-07, - "loss": 0.4576, - "step": 14705 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.911757678971664e-07, - "loss": 0.4711, - "step": 14706 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.904473115546862e-07, - "loss": 0.4062, - "step": 14707 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.897192255621577e-07, - "loss": 0.4855, - "step": 14708 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.889915099485478e-07, - "loss": 0.3823, - "step": 14709 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.882641647428068e-07, - "loss": 0.4328, - "step": 14710 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.875371899738692e-07, - "loss": 0.4349, - "step": 14711 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.868105856706564e-07, - "loss": 0.371, - "step": 14712 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.860843518620719e-07, - "loss": 0.3158, - "step": 14713 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.85358488577007e-07, - "loss": 0.4403, - "step": 14714 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.846329958443398e-07, - "loss": 0.4179, - "step": 14715 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.839078736929294e-07, - "loss": 0.3802, - "step": 14716 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.831831221516228e-07, - "loss": 0.4171, - "step": 14717 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.824587412492522e-07, - "loss": 0.4716, - "step": 14718 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.81734731014635e-07, - "loss": 0.4115, - "step": 14719 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.810110914765722e-07, - "loss": 0.4596, - "step": 14720 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.802878226638532e-07, - "loss": 0.4079, - "step": 14721 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.795649246052516e-07, - "loss": 0.4802, - "step": 14722 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.788423973295233e-07, - "loss": 0.4572, - "step": 14723 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.781202408654119e-07, - "loss": 0.4121, - "step": 14724 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.773984552416458e-07, - "loss": 0.4224, - "step": 14725 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.766770404869405e-07, - "loss": 0.4836, - "step": 14726 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.759559966299944e-07, - "loss": 0.3885, - "step": 14727 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.752353236994924e-07, - "loss": 0.4816, - "step": 14728 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.745150217241026e-07, - "loss": 0.4693, - "step": 14729 - }, - { - "epoch": 2.66, - "grad_norm": 0.0, - "learning_rate": 6.737950907324808e-07, - "loss": 0.4405, - "step": 14730 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.730755307532666e-07, - "loss": 0.427, - "step": 14731 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.723563418150869e-07, - "loss": 0.3871, - "step": 14732 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.7163752394655e-07, - "loss": 0.4317, - "step": 14733 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.70919077176253e-07, - "loss": 0.3947, - "step": 14734 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.702010015327764e-07, - "loss": 0.4631, - "step": 14735 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.694832970446874e-07, - "loss": 0.4859, - "step": 14736 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.687659637405352e-07, - "loss": 0.512, - "step": 14737 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.680490016488616e-07, - "loss": 0.4785, - "step": 14738 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.673324107981838e-07, - "loss": 0.4405, - "step": 14739 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.666161912170099e-07, - "loss": 0.4479, - "step": 14740 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.659003429338329e-07, - "loss": 0.4259, - "step": 14741 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.651848659771287e-07, - "loss": 0.3916, - "step": 14742 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.644697603753635e-07, - "loss": 0.46, - "step": 14743 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.637550261569848e-07, - "loss": 0.416, - "step": 14744 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.630406633504261e-07, - "loss": 0.5137, - "step": 14745 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.623266719841026e-07, - "loss": 0.4664, - "step": 14746 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.616130520864194e-07, - "loss": 0.379, - "step": 14747 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.608998036857684e-07, - "loss": 0.4618, - "step": 14748 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.601869268105221e-07, - "loss": 0.402, - "step": 14749 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.594744214890392e-07, - "loss": 0.4156, - "step": 14750 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.587622877496658e-07, - "loss": 0.3846, - "step": 14751 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.580505256207303e-07, - "loss": 0.4174, - "step": 14752 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.573391351305492e-07, - "loss": 0.4873, - "step": 14753 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.566281163074217e-07, - "loss": 0.3411, - "step": 14754 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.559174691796332e-07, - "loss": 0.4677, - "step": 14755 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.552071937754567e-07, - "loss": 0.4106, - "step": 14756 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.54497290123145e-07, - "loss": 0.4606, - "step": 14757 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.5378775825094e-07, - "loss": 0.4567, - "step": 14758 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.530785981870702e-07, - "loss": 0.4173, - "step": 14759 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.523698099597476e-07, - "loss": 0.4036, - "step": 14760 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.51661393597166e-07, - "loss": 0.3449, - "step": 14761 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.509533491275088e-07, - "loss": 0.4198, - "step": 14762 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.502456765789411e-07, - "loss": 0.418, - "step": 14763 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.495383759796192e-07, - "loss": 0.4472, - "step": 14764 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.488314473576795e-07, - "loss": 0.4165, - "step": 14765 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.481248907412429e-07, - "loss": 0.3744, - "step": 14766 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.474187061584203e-07, - "loss": 0.4108, - "step": 14767 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.467128936373024e-07, - "loss": 0.4352, - "step": 14768 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.460074532059691e-07, - "loss": 0.3607, - "step": 14769 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.453023848924833e-07, - "loss": 0.4365, - "step": 14770 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.44597688724895e-07, - "loss": 0.465, - "step": 14771 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.438933647312362e-07, - "loss": 0.3968, - "step": 14772 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.431894129395277e-07, - "loss": 0.4211, - "step": 14773 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.424858333777728e-07, - "loss": 0.3854, - "step": 14774 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.417826260739635e-07, - "loss": 0.4658, - "step": 14775 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.410797910560717e-07, - "loss": 0.4578, - "step": 14776 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.403773283520587e-07, - "loss": 0.4018, - "step": 14777 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.396752379898697e-07, - "loss": 0.4, - "step": 14778 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.389735199974334e-07, - "loss": 0.4044, - "step": 14779 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.382721744026677e-07, - "loss": 0.415, - "step": 14780 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.375712012334722e-07, - "loss": 0.3751, - "step": 14781 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.368706005177338e-07, - "loss": 0.4944, - "step": 14782 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.361703722833246e-07, - "loss": 0.4544, - "step": 14783 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.354705165580965e-07, - "loss": 0.4457, - "step": 14784 - }, - { - "epoch": 2.67, - "grad_norm": 0.0, - "learning_rate": 6.347710333698931e-07, - "loss": 0.3942, - "step": 14785 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.340719227465431e-07, - "loss": 0.47, - "step": 14786 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.333731847158553e-07, - "loss": 0.4405, - "step": 14787 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.326748193056298e-07, - "loss": 0.457, - "step": 14788 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.319768265436465e-07, - "loss": 0.439, - "step": 14789 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.312792064576733e-07, - "loss": 0.3802, - "step": 14790 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.305819590754625e-07, - "loss": 0.4397, - "step": 14791 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.298850844247518e-07, - "loss": 0.4093, - "step": 14792 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.291885825332655e-07, - "loss": 0.3747, - "step": 14793 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.284924534287096e-07, - "loss": 0.4223, - "step": 14794 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.277966971387783e-07, - "loss": 0.3606, - "step": 14795 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.271013136911486e-07, - "loss": 0.4722, - "step": 14796 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.26406303113487e-07, - "loss": 0.3745, - "step": 14797 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.257116654334416e-07, - "loss": 0.4061, - "step": 14798 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.250174006786436e-07, - "loss": 0.482, - "step": 14799 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.243235088767141e-07, - "loss": 0.4371, - "step": 14800 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.236299900552545e-07, - "loss": 0.4987, - "step": 14801 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.229368442418593e-07, - "loss": 0.3749, - "step": 14802 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.222440714640987e-07, - "loss": 0.5118, - "step": 14803 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.215516717495351e-07, - "loss": 0.4759, - "step": 14804 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.208596451257121e-07, - "loss": 0.3819, - "step": 14805 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.201679916201586e-07, - "loss": 0.4665, - "step": 14806 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.194767112603928e-07, - "loss": 0.4932, - "step": 14807 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.187858040739114e-07, - "loss": 0.4266, - "step": 14808 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.180952700882026e-07, - "loss": 0.4813, - "step": 14809 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.174051093307365e-07, - "loss": 0.4542, - "step": 14810 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.167153218289667e-07, - "loss": 0.4376, - "step": 14811 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.160259076103359e-07, - "loss": 0.4427, - "step": 14812 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.15336866702273e-07, - "loss": 0.4724, - "step": 14813 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.14648199132184e-07, - "loss": 0.4422, - "step": 14814 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.13959904927468e-07, - "loss": 0.5024, - "step": 14815 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.132719841155066e-07, - "loss": 0.5059, - "step": 14816 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.125844367236644e-07, - "loss": 0.4595, - "step": 14817 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.118972627792963e-07, - "loss": 0.4839, - "step": 14818 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.112104623097382e-07, - "loss": 0.4625, - "step": 14819 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.105240353423103e-07, - "loss": 0.4126, - "step": 14820 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.098379819043243e-07, - "loss": 0.4175, - "step": 14821 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.091523020230661e-07, - "loss": 0.4431, - "step": 14822 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.084669957258182e-07, - "loss": 0.4517, - "step": 14823 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.077820630398423e-07, - "loss": 0.4586, - "step": 14824 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.070975039923854e-07, - "loss": 0.4311, - "step": 14825 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.064133186106802e-07, - "loss": 0.432, - "step": 14826 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.05729506921946e-07, - "loss": 0.5132, - "step": 14827 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.050460689533844e-07, - "loss": 0.5182, - "step": 14828 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.043630047321858e-07, - "loss": 0.4202, - "step": 14829 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.036803142855219e-07, - "loss": 0.4697, - "step": 14830 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.02997997640552e-07, - "loss": 0.3945, - "step": 14831 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.023160548244189e-07, - "loss": 0.4121, - "step": 14832 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.016344858642531e-07, - "loss": 0.4237, - "step": 14833 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.009532907871663e-07, - "loss": 0.4349, - "step": 14834 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 6.0027246962026e-07, - "loss": 0.3792, - "step": 14835 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 5.995920223906193e-07, - "loss": 0.3581, - "step": 14836 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 5.989119491253104e-07, - "loss": 0.4805, - "step": 14837 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 5.982322498513893e-07, - "loss": 0.3995, - "step": 14838 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 5.975529245958933e-07, - "loss": 0.376, - "step": 14839 - }, - { - "epoch": 2.68, - "grad_norm": 0.0, - "learning_rate": 5.96873973385852e-07, - "loss": 0.3663, - "step": 14840 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.961953962482714e-07, - "loss": 0.4636, - "step": 14841 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.955171932101478e-07, - "loss": 0.4385, - "step": 14842 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.948393642984607e-07, - "loss": 0.4422, - "step": 14843 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.941619095401763e-07, - "loss": 0.3976, - "step": 14844 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.93484828962243e-07, - "loss": 0.4464, - "step": 14845 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.928081225915983e-07, - "loss": 0.495, - "step": 14846 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.921317904551615e-07, - "loss": 0.4887, - "step": 14847 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.914558325798392e-07, - "loss": 0.3594, - "step": 14848 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.907802489925218e-07, - "loss": 0.3847, - "step": 14849 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.901050397200836e-07, - "loss": 0.4354, - "step": 14850 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.894302047893885e-07, - "loss": 0.4912, - "step": 14851 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.887557442272807e-07, - "loss": 0.3736, - "step": 14852 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.88081658060592e-07, - "loss": 0.3909, - "step": 14853 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.874079463161375e-07, - "loss": 0.3794, - "step": 14854 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.867346090207193e-07, - "loss": 0.4438, - "step": 14855 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.860616462011248e-07, - "loss": 0.5013, - "step": 14856 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.853890578841248e-07, - "loss": 0.4754, - "step": 14857 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.847168440964767e-07, - "loss": 0.4162, - "step": 14858 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.840450048649226e-07, - "loss": 0.4257, - "step": 14859 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.833735402161867e-07, - "loss": 0.4062, - "step": 14860 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.82702450176984e-07, - "loss": 0.4581, - "step": 14861 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.8203173477401e-07, - "loss": 0.4117, - "step": 14862 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.81361394033948e-07, - "loss": 0.3662, - "step": 14863 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.806914279834652e-07, - "loss": 0.4102, - "step": 14864 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.800218366492127e-07, - "loss": 0.4822, - "step": 14865 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.793526200578293e-07, - "loss": 0.3994, - "step": 14866 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.786837782359367e-07, - "loss": 0.4431, - "step": 14867 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.780153112101439e-07, - "loss": 0.4787, - "step": 14868 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.773472190070417e-07, - "loss": 0.4861, - "step": 14869 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.766795016532101e-07, - "loss": 0.4179, - "step": 14870 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.7601215917521e-07, - "loss": 0.4288, - "step": 14871 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.7534519159959e-07, - "loss": 0.4936, - "step": 14872 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.746785989528847e-07, - "loss": 0.4183, - "step": 14873 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.740123812616116e-07, - "loss": 0.4667, - "step": 14874 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.733465385522729e-07, - "loss": 0.4423, - "step": 14875 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.726810708513586e-07, - "loss": 0.4504, - "step": 14876 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.720159781853385e-07, - "loss": 0.4064, - "step": 14877 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.713512605806759e-07, - "loss": 0.5004, - "step": 14878 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.706869180638119e-07, - "loss": 0.4001, - "step": 14879 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.700229506611754e-07, - "loss": 0.4261, - "step": 14880 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.693593583991819e-07, - "loss": 0.52, - "step": 14881 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.686961413042258e-07, - "loss": 0.4917, - "step": 14882 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.680332994026961e-07, - "loss": 0.4478, - "step": 14883 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.673708327209593e-07, - "loss": 0.5352, - "step": 14884 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.6670874128537e-07, - "loss": 0.4414, - "step": 14885 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.660470251222661e-07, - "loss": 0.4132, - "step": 14886 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.653856842579731e-07, - "loss": 0.466, - "step": 14887 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.647247187187988e-07, - "loss": 0.4897, - "step": 14888 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.640641285310421e-07, - "loss": 0.3804, - "step": 14889 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.634039137209769e-07, - "loss": 0.4406, - "step": 14890 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.627440743148704e-07, - "loss": 0.4032, - "step": 14891 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.62084610338971e-07, - "loss": 0.4154, - "step": 14892 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.61425521819512e-07, - "loss": 0.3812, - "step": 14893 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.607668087827168e-07, - "loss": 0.4323, - "step": 14894 - }, - { - "epoch": 2.69, - "grad_norm": 0.0, - "learning_rate": 5.601084712547889e-07, - "loss": 0.4758, - "step": 14895 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.594505092619173e-07, - "loss": 0.3434, - "step": 14896 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.58792922830278e-07, - "loss": 0.4807, - "step": 14897 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.581357119860264e-07, - "loss": 0.4645, - "step": 14898 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.57478876755313e-07, - "loss": 0.4496, - "step": 14899 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.568224171642667e-07, - "loss": 0.3705, - "step": 14900 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.561663332389999e-07, - "loss": 0.3979, - "step": 14901 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.555106250056164e-07, - "loss": 0.3908, - "step": 14902 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.548552924901984e-07, - "loss": 0.4804, - "step": 14903 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.542003357188174e-07, - "loss": 0.3557, - "step": 14904 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.535457547175294e-07, - "loss": 0.428, - "step": 14905 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.528915495123733e-07, - "loss": 0.3897, - "step": 14906 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.52237720129375e-07, - "loss": 0.4761, - "step": 14907 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.515842665945447e-07, - "loss": 0.4591, - "step": 14908 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.509311889338776e-07, - "loss": 0.3736, - "step": 14909 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.502784871733569e-07, - "loss": 0.4227, - "step": 14910 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.496261613389453e-07, - "loss": 0.4304, - "step": 14911 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.489742114565966e-07, - "loss": 0.3736, - "step": 14912 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.483226375522421e-07, - "loss": 0.5362, - "step": 14913 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.476714396518035e-07, - "loss": 0.448, - "step": 14914 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.470206177811888e-07, - "loss": 0.4034, - "step": 14915 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.463701719662884e-07, - "loss": 0.3909, - "step": 14916 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.457201022329773e-07, - "loss": 0.4041, - "step": 14917 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.450704086071168e-07, - "loss": 0.5275, - "step": 14918 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.444210911145531e-07, - "loss": 0.4539, - "step": 14919 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.437721497811154e-07, - "loss": 0.3511, - "step": 14920 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.431235846326222e-07, - "loss": 0.4805, - "step": 14921 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.424753956948725e-07, - "loss": 0.4041, - "step": 14922 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.418275829936537e-07, - "loss": 0.4074, - "step": 14923 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.411801465547362e-07, - "loss": 0.5064, - "step": 14924 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.40533086403876e-07, - "loss": 0.4134, - "step": 14925 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.398864025668138e-07, - "loss": 0.443, - "step": 14926 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.392400950692789e-07, - "loss": 0.4222, - "step": 14927 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.385941639369796e-07, - "loss": 0.3897, - "step": 14928 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.379486091956121e-07, - "loss": 0.4743, - "step": 14929 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.373034308708581e-07, - "loss": 0.4321, - "step": 14930 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.366586289883835e-07, - "loss": 0.4911, - "step": 14931 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.360142035738414e-07, - "loss": 0.4548, - "step": 14932 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.353701546528667e-07, - "loss": 0.4419, - "step": 14933 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.347264822510822e-07, - "loss": 0.4872, - "step": 14934 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.34083186394092e-07, - "loss": 0.3819, - "step": 14935 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.334402671074868e-07, - "loss": 0.3755, - "step": 14936 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.327977244168469e-07, - "loss": 0.4046, - "step": 14937 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.321555583477322e-07, - "loss": 0.4983, - "step": 14938 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.315137689256878e-07, - "loss": 0.409, - "step": 14939 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.308723561762463e-07, - "loss": 0.3985, - "step": 14940 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.302313201249243e-07, - "loss": 0.4851, - "step": 14941 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.295906607972223e-07, - "loss": 0.4273, - "step": 14942 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.28950378218629e-07, - "loss": 0.3947, - "step": 14943 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.283104724146126e-07, - "loss": 0.3949, - "step": 14944 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.276709434106331e-07, - "loss": 0.4354, - "step": 14945 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.270317912321299e-07, - "loss": 0.4096, - "step": 14946 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.263930159045283e-07, - "loss": 0.4434, - "step": 14947 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.257546174532435e-07, - "loss": 0.422, - "step": 14948 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.251165959036697e-07, - "loss": 0.3585, - "step": 14949 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.24478951281191e-07, - "loss": 0.4113, - "step": 14950 - }, - { - "epoch": 2.7, - "grad_norm": 0.0, - "learning_rate": 5.238416836111715e-07, - "loss": 0.4366, - "step": 14951 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.23204792918961e-07, - "loss": 0.3958, - "step": 14952 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.225682792299003e-07, - "loss": 0.4491, - "step": 14953 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.219321425693091e-07, - "loss": 0.4323, - "step": 14954 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.21296382962494e-07, - "loss": 0.4858, - "step": 14955 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.206610004347468e-07, - "loss": 0.4542, - "step": 14956 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.200259950113429e-07, - "loss": 0.371, - "step": 14957 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.193913667175466e-07, - "loss": 0.3696, - "step": 14958 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.187571155786019e-07, - "loss": 0.4724, - "step": 14959 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.181232416197423e-07, - "loss": 0.4179, - "step": 14960 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.174897448661831e-07, - "loss": 0.4642, - "step": 14961 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.168566253431262e-07, - "loss": 0.4384, - "step": 14962 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.162238830757583e-07, - "loss": 0.4526, - "step": 14963 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.15591518089249e-07, - "loss": 0.4954, - "step": 14964 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.149595304087607e-07, - "loss": 0.455, - "step": 14965 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.143279200594286e-07, - "loss": 0.4175, - "step": 14966 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.136966870663828e-07, - "loss": 0.4666, - "step": 14967 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.130658314547332e-07, - "loss": 0.4115, - "step": 14968 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.124353532495752e-07, - "loss": 0.4414, - "step": 14969 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.118052524759931e-07, - "loss": 0.44, - "step": 14970 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.111755291590526e-07, - "loss": 0.4514, - "step": 14971 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.105461833238068e-07, - "loss": 0.4633, - "step": 14972 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.09917214995288e-07, - "loss": 0.3874, - "step": 14973 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.092886241985184e-07, - "loss": 0.3559, - "step": 14974 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.08660410958508e-07, - "loss": 0.4761, - "step": 14975 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.080325753002446e-07, - "loss": 0.3963, - "step": 14976 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.074051172487071e-07, - "loss": 0.4134, - "step": 14977 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.067780368288555e-07, - "loss": 0.4692, - "step": 14978 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.061513340656366e-07, - "loss": 0.4047, - "step": 14979 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.055250089839814e-07, - "loss": 0.5028, - "step": 14980 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.048990616088057e-07, - "loss": 0.4825, - "step": 14981 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.042734919650117e-07, - "loss": 0.4273, - "step": 14982 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.036483000774861e-07, - "loss": 0.4194, - "step": 14983 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.03023485971098e-07, - "loss": 0.4378, - "step": 14984 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.023990496707043e-07, - "loss": 0.4618, - "step": 14985 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.017749912011482e-07, - "loss": 0.4277, - "step": 14986 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.011513105872546e-07, - "loss": 0.4848, - "step": 14987 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 5.005280078538344e-07, - "loss": 0.4274, - "step": 14988 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.999050830256824e-07, - "loss": 0.4704, - "step": 14989 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.992825361275799e-07, - "loss": 0.4979, - "step": 14990 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.986603671842949e-07, - "loss": 0.4241, - "step": 14991 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.980385762205775e-07, - "loss": 0.448, - "step": 14992 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.974171632611624e-07, - "loss": 0.506, - "step": 14993 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.967961283307721e-07, - "loss": 0.4055, - "step": 14994 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.961754714541122e-07, - "loss": 0.4912, - "step": 14995 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.955551926558722e-07, - "loss": 0.4268, - "step": 14996 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.949352919607286e-07, - "loss": 0.43, - "step": 14997 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.94315769393342e-07, - "loss": 0.3377, - "step": 14998 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.936966249783592e-07, - "loss": 0.4485, - "step": 14999 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.930778587404094e-07, - "loss": 0.3854, - "step": 15000 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.924594707041075e-07, - "loss": 0.4597, - "step": 15001 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.918414608940558e-07, - "loss": 0.489, - "step": 15002 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.912238293348414e-07, - "loss": 0.3727, - "step": 15003 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.906065760510304e-07, - "loss": 0.397, - "step": 15004 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.899897010671806e-07, - "loss": 0.4101, - "step": 15005 - }, - { - "epoch": 2.71, - "grad_norm": 0.0, - "learning_rate": 4.893732044078303e-07, - "loss": 0.3953, - "step": 15006 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.887570860975088e-07, - "loss": 0.4035, - "step": 15007 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.881413461607232e-07, - "loss": 0.4213, - "step": 15008 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.875259846219704e-07, - "loss": 0.5161, - "step": 15009 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.86911001505731e-07, - "loss": 0.4091, - "step": 15010 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.862963968364653e-07, - "loss": 0.5226, - "step": 15011 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.856821706386283e-07, - "loss": 0.3877, - "step": 15012 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.850683229366538e-07, - "loss": 0.4491, - "step": 15013 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.844548537549609e-07, - "loss": 0.4089, - "step": 15014 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.838417631179559e-07, - "loss": 0.4803, - "step": 15015 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.832290510500271e-07, - "loss": 0.3672, - "step": 15016 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.826167175755503e-07, - "loss": 0.4417, - "step": 15017 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.82004762718884e-07, - "loss": 0.4657, - "step": 15018 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.813931865043731e-07, - "loss": 0.4705, - "step": 15019 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.807819889563481e-07, - "loss": 0.4785, - "step": 15020 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.80171170099123e-07, - "loss": 0.4807, - "step": 15021 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.795607299569971e-07, - "loss": 0.4736, - "step": 15022 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.789506685542533e-07, - "loss": 0.4567, - "step": 15023 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.783409859151634e-07, - "loss": 0.4091, - "step": 15024 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.777316820639822e-07, - "loss": 0.4404, - "step": 15025 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.771227570249459e-07, - "loss": 0.4249, - "step": 15026 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.765142108222798e-07, - "loss": 0.4665, - "step": 15027 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.759060434801921e-07, - "loss": 0.4409, - "step": 15028 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.752982550228791e-07, - "loss": 0.4759, - "step": 15029 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7469084547451804e-07, - "loss": 0.4218, - "step": 15030 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7408381485927304e-07, - "loss": 0.3727, - "step": 15031 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7347716320129135e-07, - "loss": 0.4791, - "step": 15032 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7287089052470925e-07, - "loss": 0.5213, - "step": 15033 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7226499685364413e-07, - "loss": 0.5031, - "step": 15034 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.716594822121989e-07, - "loss": 0.4043, - "step": 15035 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7105434662446214e-07, - "loss": 0.4377, - "step": 15036 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.7044959011450787e-07, - "loss": 0.3683, - "step": 15037 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.698452127063946e-07, - "loss": 0.4335, - "step": 15038 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.69241214424162e-07, - "loss": 0.4584, - "step": 15039 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6863759529184425e-07, - "loss": 0.3634, - "step": 15040 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.68034355333451e-07, - "loss": 0.442, - "step": 15041 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.674314945729785e-07, - "loss": 0.4594, - "step": 15042 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6682901303441217e-07, - "loss": 0.4802, - "step": 15043 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6622691074171724e-07, - "loss": 0.388, - "step": 15044 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6562518771885e-07, - "loss": 0.3691, - "step": 15045 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6502384398974586e-07, - "loss": 0.4333, - "step": 15046 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6442287957832677e-07, - "loss": 0.5227, - "step": 15047 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6382229450850357e-07, - "loss": 0.4519, - "step": 15048 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6322208880416164e-07, - "loss": 0.3808, - "step": 15049 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.626222624891852e-07, - "loss": 0.4738, - "step": 15050 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.620228155874329e-07, - "loss": 0.4832, - "step": 15051 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.614237481227524e-07, - "loss": 0.4617, - "step": 15052 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6082506011897563e-07, - "loss": 0.4576, - "step": 15053 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.6022675159991924e-07, - "loss": 0.3942, - "step": 15054 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.5962882258938634e-07, - "loss": 0.4608, - "step": 15055 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.5903127311116123e-07, - "loss": 0.3906, - "step": 15056 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.5843410318901717e-07, - "loss": 0.4437, - "step": 15057 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.578373128467106e-07, - "loss": 0.4467, - "step": 15058 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.5724090210798153e-07, - "loss": 0.3159, - "step": 15059 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.566448709965565e-07, - "loss": 0.4077, - "step": 15060 - }, - { - "epoch": 2.72, - "grad_norm": 0.0, - "learning_rate": 4.5604921953614765e-07, - "loss": 0.3971, - "step": 15061 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.554539477504505e-07, - "loss": 0.4239, - "step": 15062 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.5485905566314823e-07, - "loss": 0.3518, - "step": 15063 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.54264543297902e-07, - "loss": 0.3696, - "step": 15064 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.5367041067836626e-07, - "loss": 0.4228, - "step": 15065 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.530766578281731e-07, - "loss": 0.5292, - "step": 15066 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.5248328477094704e-07, - "loss": 0.4856, - "step": 15067 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.518902915302914e-07, - "loss": 0.4217, - "step": 15068 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.5129767812979617e-07, - "loss": 0.4685, - "step": 15069 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.5070544459303813e-07, - "loss": 0.4109, - "step": 15070 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.501135909435761e-07, - "loss": 0.5235, - "step": 15071 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4952211720495574e-07, - "loss": 0.4979, - "step": 15072 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.48931023400706e-07, - "loss": 0.4272, - "step": 15073 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4834030955434147e-07, - "loss": 0.5134, - "step": 15074 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.477499756893633e-07, - "loss": 0.3788, - "step": 15075 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.471600218292549e-07, - "loss": 0.5302, - "step": 15076 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4657044799748415e-07, - "loss": 0.4123, - "step": 15077 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4598125421750903e-07, - "loss": 0.4041, - "step": 15078 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4539244051276633e-07, - "loss": 0.4207, - "step": 15079 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4480400690667946e-07, - "loss": 0.4415, - "step": 15080 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4421595342265867e-07, - "loss": 0.4739, - "step": 15081 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.436282800840952e-07, - "loss": 0.425, - "step": 15082 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.430409869143715e-07, - "loss": 0.4391, - "step": 15083 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4245407393684994e-07, - "loss": 0.448, - "step": 15084 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.418675411748774e-07, - "loss": 0.3928, - "step": 15085 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.4128138865178973e-07, - "loss": 0.4462, - "step": 15086 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.406956163909004e-07, - "loss": 0.4043, - "step": 15087 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.401102244155175e-07, - "loss": 0.3961, - "step": 15088 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.395252127489258e-07, - "loss": 0.497, - "step": 15089 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.389405814143999e-07, - "loss": 0.3768, - "step": 15090 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.383563304351968e-07, - "loss": 0.4452, - "step": 15091 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3777245983455897e-07, - "loss": 0.4434, - "step": 15092 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3718896963571345e-07, - "loss": 0.4017, - "step": 15093 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3660585986187276e-07, - "loss": 0.4715, - "step": 15094 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.360231305362328e-07, - "loss": 0.4264, - "step": 15095 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.354407816819783e-07, - "loss": 0.4403, - "step": 15096 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3485881332227417e-07, - "loss": 0.371, - "step": 15097 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3427722548027074e-07, - "loss": 0.3964, - "step": 15098 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.336960181791072e-07, - "loss": 0.3983, - "step": 15099 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3311519144190516e-07, - "loss": 0.3837, - "step": 15100 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3253474529177053e-07, - "loss": 0.3852, - "step": 15101 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.319546797517926e-07, - "loss": 0.458, - "step": 15102 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.313749948450463e-07, - "loss": 0.4005, - "step": 15103 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.307956905945965e-07, - "loss": 0.3892, - "step": 15104 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.3021676702348803e-07, - "loss": 0.4728, - "step": 15105 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.296382241547492e-07, - "loss": 0.4172, - "step": 15106 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2906006201139716e-07, - "loss": 0.4353, - "step": 15107 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2848228061643235e-07, - "loss": 0.4436, - "step": 15108 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.279048799928387e-07, - "loss": 0.5103, - "step": 15109 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2732786016358665e-07, - "loss": 0.3957, - "step": 15110 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2675122115163225e-07, - "loss": 0.3848, - "step": 15111 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.261749629799128e-07, - "loss": 0.384, - "step": 15112 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.255990856713554e-07, - "loss": 0.3398, - "step": 15113 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2502358924886746e-07, - "loss": 0.4074, - "step": 15114 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.244484737353427e-07, - "loss": 0.4391, - "step": 15115 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.238737391536651e-07, - "loss": 0.4365, - "step": 15116 - }, - { - "epoch": 2.73, - "grad_norm": 0.0, - "learning_rate": 4.2329938552669205e-07, - "loss": 0.4365, - "step": 15117 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.227254128772762e-07, - "loss": 0.5293, - "step": 15118 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.221518212282505e-07, - "loss": 0.4771, - "step": 15119 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.215786106024311e-07, - "loss": 0.453, - "step": 15120 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.2100578102262534e-07, - "loss": 0.3893, - "step": 15121 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.204333325116183e-07, - "loss": 0.4214, - "step": 15122 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.198612650921852e-07, - "loss": 0.3797, - "step": 15123 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1928957878708323e-07, - "loss": 0.4954, - "step": 15124 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1871827361905203e-07, - "loss": 0.4371, - "step": 15125 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.181473496108224e-07, - "loss": 0.4784, - "step": 15126 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.17576806785106e-07, - "loss": 0.462, - "step": 15127 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.170066451646004e-07, - "loss": 0.4491, - "step": 15128 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.164368647719874e-07, - "loss": 0.4301, - "step": 15129 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.158674656299322e-07, - "loss": 0.3949, - "step": 15130 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1529844776108885e-07, - "loss": 0.3681, - "step": 15131 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.147298111880915e-07, - "loss": 0.3633, - "step": 15132 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.141615559335643e-07, - "loss": 0.4573, - "step": 15133 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.135936820201103e-07, - "loss": 0.359, - "step": 15134 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.130261894703236e-07, - "loss": 0.3663, - "step": 15135 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1245907830677614e-07, - "loss": 0.4575, - "step": 15136 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.118923485520321e-07, - "loss": 0.4915, - "step": 15137 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1132600022863676e-07, - "loss": 0.3761, - "step": 15138 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1076003335911997e-07, - "loss": 0.4135, - "step": 15139 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.1019444796599473e-07, - "loss": 0.4493, - "step": 15140 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0962924407176196e-07, - "loss": 0.3823, - "step": 15141 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0906442169890815e-07, - "loss": 0.4583, - "step": 15142 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0849998086990196e-07, - "loss": 0.5363, - "step": 15143 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0793592160719764e-07, - "loss": 0.4487, - "step": 15144 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.073722439332339e-07, - "loss": 0.4057, - "step": 15145 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.068089478704362e-07, - "loss": 0.4264, - "step": 15146 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0624603344121214e-07, - "loss": 0.4872, - "step": 15147 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0568350066795605e-07, - "loss": 0.5498, - "step": 15148 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0512134957304663e-07, - "loss": 0.3971, - "step": 15149 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.04559580178846e-07, - "loss": 0.4204, - "step": 15150 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.039981925077041e-07, - "loss": 0.4159, - "step": 15151 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0343718658195195e-07, - "loss": 0.435, - "step": 15152 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0287656242390837e-07, - "loss": 0.516, - "step": 15153 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.023163200558777e-07, - "loss": 0.4199, - "step": 15154 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.017564595001444e-07, - "loss": 0.4187, - "step": 15155 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.011969807789817e-07, - "loss": 0.415, - "step": 15156 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.006378839146463e-07, - "loss": 0.4214, - "step": 15157 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 4.0007916892938034e-07, - "loss": 0.4108, - "step": 15158 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.995208358454106e-07, - "loss": 0.3336, - "step": 15159 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9896288468494917e-07, - "loss": 0.3908, - "step": 15160 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9840531547019167e-07, - "loss": 0.4406, - "step": 15161 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.978481282233204e-07, - "loss": 0.4362, - "step": 15162 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9729132296649764e-07, - "loss": 0.4553, - "step": 15163 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9673489972187786e-07, - "loss": 0.439, - "step": 15164 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9617885851159554e-07, - "loss": 0.3832, - "step": 15165 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.956231993577697e-07, - "loss": 0.4579, - "step": 15166 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.95067922282506e-07, - "loss": 0.4616, - "step": 15167 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9451302730789454e-07, - "loss": 0.4385, - "step": 15168 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9395851445600985e-07, - "loss": 0.4778, - "step": 15169 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.934043837489121e-07, - "loss": 0.4268, - "step": 15170 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9285063520864476e-07, - "loss": 0.4908, - "step": 15171 - }, - { - "epoch": 2.74, - "grad_norm": 0.0, - "learning_rate": 3.9229726885723686e-07, - "loss": 0.3614, - "step": 15172 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.91744284716703e-07, - "loss": 0.4491, - "step": 15173 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.911916828090401e-07, - "loss": 0.4523, - "step": 15174 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.906394631562338e-07, - "loss": 0.4516, - "step": 15175 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.9008762578025105e-07, - "loss": 0.3955, - "step": 15176 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.895361707030476e-07, - "loss": 0.4237, - "step": 15177 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8898509794655704e-07, - "loss": 0.3664, - "step": 15178 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8843440753270176e-07, - "loss": 0.4709, - "step": 15179 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8788409948339323e-07, - "loss": 0.3748, - "step": 15180 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.873341738205216e-07, - "loss": 0.4293, - "step": 15181 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8678463056596506e-07, - "loss": 0.3833, - "step": 15182 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8623546974158266e-07, - "loss": 0.5404, - "step": 15183 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8568669136922364e-07, - "loss": 0.4437, - "step": 15184 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8513829547071725e-07, - "loss": 0.4131, - "step": 15185 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8459028206788153e-07, - "loss": 0.4263, - "step": 15186 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.840426511825157e-07, - "loss": 0.431, - "step": 15187 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.834954028364069e-07, - "loss": 0.4703, - "step": 15188 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.829485370513253e-07, - "loss": 0.4378, - "step": 15189 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.824020538490236e-07, - "loss": 0.4389, - "step": 15190 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.818559532512467e-07, - "loss": 0.4586, - "step": 15191 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.81310235279716e-07, - "loss": 0.4083, - "step": 15192 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.80764899956142e-07, - "loss": 0.4584, - "step": 15193 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.8021994730221947e-07, - "loss": 0.4012, - "step": 15194 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.7967537733962446e-07, - "loss": 0.5165, - "step": 15195 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.791311900900252e-07, - "loss": 0.455, - "step": 15196 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.785873855750688e-07, - "loss": 0.4395, - "step": 15197 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.78043963816388e-07, - "loss": 0.3378, - "step": 15198 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.775009248356032e-07, - "loss": 0.431, - "step": 15199 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.7695826865431387e-07, - "loss": 0.4679, - "step": 15200 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.7641599529411155e-07, - "loss": 0.3767, - "step": 15201 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.7587410477656574e-07, - "loss": 0.4661, - "step": 15202 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.753325971232358e-07, - "loss": 0.385, - "step": 15203 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.747914723556623e-07, - "loss": 0.4492, - "step": 15204 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.742507304953735e-07, - "loss": 0.4837, - "step": 15205 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.737103715638812e-07, - "loss": 0.3971, - "step": 15206 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.731703955826804e-07, - "loss": 0.4351, - "step": 15207 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.726308025732528e-07, - "loss": 0.3561, - "step": 15208 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.720915925570645e-07, - "loss": 0.4656, - "step": 15209 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.715527655555662e-07, - "loss": 0.4564, - "step": 15210 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.710143215901929e-07, - "loss": 0.3756, - "step": 15211 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.704762606823653e-07, - "loss": 0.4234, - "step": 15212 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.699385828534885e-07, - "loss": 0.4452, - "step": 15213 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.69401288124952e-07, - "loss": 0.5129, - "step": 15214 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.688643765181321e-07, - "loss": 0.4308, - "step": 15215 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6832784805438507e-07, - "loss": 0.4749, - "step": 15216 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.677917027550548e-07, - "loss": 0.5078, - "step": 15217 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.672559406414733e-07, - "loss": 0.4162, - "step": 15218 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.667205617349512e-07, - "loss": 0.4974, - "step": 15219 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6618556605678925e-07, - "loss": 0.4333, - "step": 15220 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6565095362826817e-07, - "loss": 0.3799, - "step": 15221 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.651167244706566e-07, - "loss": 0.4419, - "step": 15222 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.645828786052075e-07, - "loss": 0.4638, - "step": 15223 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6404941605315823e-07, - "loss": 0.5317, - "step": 15224 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6351633683572973e-07, - "loss": 0.4168, - "step": 15225 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.629836409741294e-07, - "loss": 0.4326, - "step": 15226 - }, - { - "epoch": 2.75, - "grad_norm": 0.0, - "learning_rate": 3.6245132848955033e-07, - "loss": 0.3787, - "step": 15227 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.6191939940316555e-07, - "loss": 0.4103, - "step": 15228 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.6138785373613815e-07, - "loss": 0.3626, - "step": 15229 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.6085669150961677e-07, - "loss": 0.5487, - "step": 15230 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.603259127447267e-07, - "loss": 0.4482, - "step": 15231 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5979551746258557e-07, - "loss": 0.3929, - "step": 15232 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.59265505684292e-07, - "loss": 0.5225, - "step": 15233 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5873587743093354e-07, - "loss": 0.4218, - "step": 15234 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5820663272357894e-07, - "loss": 0.3786, - "step": 15235 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.576777715832813e-07, - "loss": 0.3946, - "step": 15236 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.571492940310806e-07, - "loss": 0.375, - "step": 15237 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.566212000879987e-07, - "loss": 0.3715, - "step": 15238 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5609348977504675e-07, - "loss": 0.4512, - "step": 15239 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.555661631132157e-07, - "loss": 0.4877, - "step": 15240 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5503922012348534e-07, - "loss": 0.4826, - "step": 15241 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.545126608268168e-07, - "loss": 0.445, - "step": 15242 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.539864852441588e-07, - "loss": 0.4763, - "step": 15243 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.534606933964435e-07, - "loss": 0.4998, - "step": 15244 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.529352853045864e-07, - "loss": 0.43, - "step": 15245 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.524102609894897e-07, - "loss": 0.4547, - "step": 15246 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.518856204720411e-07, - "loss": 0.4376, - "step": 15247 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.513613637731095e-07, - "loss": 0.394, - "step": 15248 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.5083749091355255e-07, - "loss": 0.4709, - "step": 15249 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.503140019142093e-07, - "loss": 0.4231, - "step": 15250 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.497908967959063e-07, - "loss": 0.4278, - "step": 15251 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.492681755794536e-07, - "loss": 0.4643, - "step": 15252 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.487458382856468e-07, - "loss": 0.3536, - "step": 15253 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.482238849352626e-07, - "loss": 0.4165, - "step": 15254 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4770231554906555e-07, - "loss": 0.4439, - "step": 15255 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4718113014780676e-07, - "loss": 0.4082, - "step": 15256 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4666032875221966e-07, - "loss": 0.4495, - "step": 15257 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4613991138302104e-07, - "loss": 0.3054, - "step": 15258 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4561987806091435e-07, - "loss": 0.3634, - "step": 15259 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4510022880658857e-07, - "loss": 0.4993, - "step": 15260 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.445809636407149e-07, - "loss": 0.5027, - "step": 15261 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4406208258395135e-07, - "loss": 0.4198, - "step": 15262 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.435435856569391e-07, - "loss": 0.3967, - "step": 15263 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.430254728803062e-07, - "loss": 0.4482, - "step": 15264 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.425077442746627e-07, - "loss": 0.4964, - "step": 15265 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4199039986060556e-07, - "loss": 0.4145, - "step": 15266 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4147343965871495e-07, - "loss": 0.4076, - "step": 15267 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.4095686368955883e-07, - "loss": 0.3894, - "step": 15268 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.404406719736841e-07, - "loss": 0.3568, - "step": 15269 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3992486453162774e-07, - "loss": 0.4446, - "step": 15270 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3940944138390665e-07, - "loss": 0.4016, - "step": 15271 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.388944025510299e-07, - "loss": 0.4558, - "step": 15272 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.383797480534834e-07, - "loss": 0.4147, - "step": 15273 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3786547791174187e-07, - "loss": 0.3417, - "step": 15274 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.373515921462645e-07, - "loss": 0.4284, - "step": 15275 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3683809077749265e-07, - "loss": 0.3974, - "step": 15276 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.363249738258556e-07, - "loss": 0.4909, - "step": 15277 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3581224131176706e-07, - "loss": 0.3661, - "step": 15278 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3529989325562176e-07, - "loss": 0.4512, - "step": 15279 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.347879296778045e-07, - "loss": 0.4536, - "step": 15280 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3427635059868013e-07, - "loss": 0.4162, - "step": 15281 - }, - { - "epoch": 2.76, - "grad_norm": 0.0, - "learning_rate": 3.3376515603860126e-07, - "loss": 0.426, - "step": 15282 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.332543460179027e-07, - "loss": 0.3825, - "step": 15283 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.327439205569072e-07, - "loss": 0.4341, - "step": 15284 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.322338796759195e-07, - "loss": 0.4667, - "step": 15285 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.3172422339523005e-07, - "loss": 0.4946, - "step": 15286 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.3121495173511266e-07, - "loss": 0.4647, - "step": 15287 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.307060647158278e-07, - "loss": 0.439, - "step": 15288 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.3019756235762147e-07, - "loss": 0.4551, - "step": 15289 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.296894446807219e-07, - "loss": 0.41, - "step": 15290 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2918171170534085e-07, - "loss": 0.4583, - "step": 15291 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2867436345167867e-07, - "loss": 0.3962, - "step": 15292 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.281673999399171e-07, - "loss": 0.3525, - "step": 15293 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.276608211902266e-07, - "loss": 0.374, - "step": 15294 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2715462722275883e-07, - "loss": 0.3752, - "step": 15295 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2664881805764883e-07, - "loss": 0.4385, - "step": 15296 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2614339371502155e-07, - "loss": 0.4902, - "step": 15297 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2563835421498216e-07, - "loss": 0.3884, - "step": 15298 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.251336995776222e-07, - "loss": 0.5358, - "step": 15299 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.246294298230168e-07, - "loss": 0.4798, - "step": 15300 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2412554497122774e-07, - "loss": 0.4289, - "step": 15301 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.236220450423011e-07, - "loss": 0.5311, - "step": 15302 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2311893005626536e-07, - "loss": 0.394, - "step": 15303 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2261620003313455e-07, - "loss": 0.4162, - "step": 15304 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2211385499291147e-07, - "loss": 0.4818, - "step": 15305 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.216118949555791e-07, - "loss": 0.3983, - "step": 15306 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2111031994110473e-07, - "loss": 0.4519, - "step": 15307 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.2060912996944247e-07, - "loss": 0.4874, - "step": 15308 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.201083250605297e-07, - "loss": 0.4661, - "step": 15309 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1960790523429044e-07, - "loss": 0.4985, - "step": 15310 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.191078705106332e-07, - "loss": 0.4469, - "step": 15311 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1860822090944876e-07, - "loss": 0.4761, - "step": 15312 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1810895645061566e-07, - "loss": 0.4599, - "step": 15313 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.176100771539925e-07, - "loss": 0.4003, - "step": 15314 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.171115830394278e-07, - "loss": 0.4188, - "step": 15315 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1661347412675237e-07, - "loss": 0.5102, - "step": 15316 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1611575043578145e-07, - "loss": 0.4522, - "step": 15317 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1561841198631594e-07, - "loss": 0.3785, - "step": 15318 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1512145879813996e-07, - "loss": 0.3886, - "step": 15319 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.146248908910221e-07, - "loss": 0.4844, - "step": 15320 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1412870828471995e-07, - "loss": 0.4841, - "step": 15321 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1363291099896996e-07, - "loss": 0.3749, - "step": 15322 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.131374990534963e-07, - "loss": 0.4922, - "step": 15323 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.126424724680077e-07, - "loss": 0.4419, - "step": 15324 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1214783126219504e-07, - "loss": 0.4564, - "step": 15325 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1165357545573926e-07, - "loss": 0.4225, - "step": 15326 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.111597050683024e-07, - "loss": 0.4644, - "step": 15327 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.1066622011952986e-07, - "loss": 0.431, - "step": 15328 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.101731206290537e-07, - "loss": 0.5555, - "step": 15329 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0968040661648937e-07, - "loss": 0.421, - "step": 15330 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0918807810144e-07, - "loss": 0.3964, - "step": 15331 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0869613510348896e-07, - "loss": 0.4139, - "step": 15332 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0820457764220937e-07, - "loss": 0.3974, - "step": 15333 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0771340573715336e-07, - "loss": 0.3639, - "step": 15334 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0722261940786314e-07, - "loss": 0.4067, - "step": 15335 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.067322186738608e-07, - "loss": 0.3621, - "step": 15336 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.062422035546564e-07, - "loss": 0.5013, - "step": 15337 - }, - { - "epoch": 2.77, - "grad_norm": 0.0, - "learning_rate": 3.0575257406974423e-07, - "loss": 0.4081, - "step": 15338 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.05263330238601e-07, - "loss": 0.3729, - "step": 15339 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0477447208069113e-07, - "loss": 0.3872, - "step": 15340 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0428599961546126e-07, - "loss": 0.4647, - "step": 15341 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.037979128623425e-07, - "loss": 0.4418, - "step": 15342 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0331021184075607e-07, - "loss": 0.3292, - "step": 15343 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.028228965700997e-07, - "loss": 0.3587, - "step": 15344 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0233596706976007e-07, - "loss": 0.3979, - "step": 15345 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.018494233591096e-07, - "loss": 0.4782, - "step": 15346 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0136326545750047e-07, - "loss": 0.3646, - "step": 15347 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.008774933842773e-07, - "loss": 0.3331, - "step": 15348 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 3.0039210715876343e-07, - "loss": 0.4164, - "step": 15349 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.999071068002679e-07, - "loss": 0.4234, - "step": 15350 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.994224923280853e-07, - "loss": 0.4491, - "step": 15351 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9893826376149125e-07, - "loss": 0.4325, - "step": 15352 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9845442111975376e-07, - "loss": 0.4909, - "step": 15353 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.979709644221196e-07, - "loss": 0.3777, - "step": 15354 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9748789368782006e-07, - "loss": 0.4228, - "step": 15355 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.970052089360731e-07, - "loss": 0.4661, - "step": 15356 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9652291018608226e-07, - "loss": 0.5328, - "step": 15357 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.960409974570311e-07, - "loss": 0.4217, - "step": 15358 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.955594707680953e-07, - "loss": 0.4044, - "step": 15359 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9507833013842747e-07, - "loss": 0.5072, - "step": 15360 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9459757558716885e-07, - "loss": 0.5282, - "step": 15361 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9411720713344525e-07, - "loss": 0.4737, - "step": 15362 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9363722479636483e-07, - "loss": 0.4129, - "step": 15363 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.931576285950244e-07, - "loss": 0.3003, - "step": 15364 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.926784185485032e-07, - "loss": 0.4085, - "step": 15365 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.921995946758638e-07, - "loss": 0.3763, - "step": 15366 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9172115699615535e-07, - "loss": 0.4322, - "step": 15367 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.9124310552840816e-07, - "loss": 0.4853, - "step": 15368 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.907654402916438e-07, - "loss": 0.5158, - "step": 15369 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.902881613048636e-07, - "loss": 0.4861, - "step": 15370 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8981126858705357e-07, - "loss": 0.4116, - "step": 15371 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8933476215718515e-07, - "loss": 0.4505, - "step": 15372 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.888586420342154e-07, - "loss": 0.4693, - "step": 15373 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.883829082370859e-07, - "loss": 0.4592, - "step": 15374 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8790756078472036e-07, - "loss": 0.4148, - "step": 15375 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.874325996960303e-07, - "loss": 0.4098, - "step": 15376 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8695802498990957e-07, - "loss": 0.4489, - "step": 15377 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8648383668523737e-07, - "loss": 0.4665, - "step": 15378 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8601003480087984e-07, - "loss": 0.5611, - "step": 15379 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.855366193556819e-07, - "loss": 0.4359, - "step": 15380 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8506359036848176e-07, - "loss": 0.4396, - "step": 15381 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8459094785809327e-07, - "loss": 0.5285, - "step": 15382 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.841186918433203e-07, - "loss": 0.4638, - "step": 15383 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8364682234295e-07, - "loss": 0.3994, - "step": 15384 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.8317533937575303e-07, - "loss": 0.4179, - "step": 15385 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.827042429604876e-07, - "loss": 0.4322, - "step": 15386 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.822335331158954e-07, - "loss": 0.4554, - "step": 15387 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.817632098606993e-07, - "loss": 0.3604, - "step": 15388 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.81293273213612e-07, - "loss": 0.4426, - "step": 15389 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.808237231933264e-07, - "loss": 0.39, - "step": 15390 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.803545598185231e-07, - "loss": 0.4393, - "step": 15391 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.7988578310786605e-07, - "loss": 0.3807, - "step": 15392 - }, - { - "epoch": 2.78, - "grad_norm": 0.0, - "learning_rate": 2.794173930800048e-07, - "loss": 0.4596, - "step": 15393 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.789493897535711e-07, - "loss": 0.456, - "step": 15394 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.784817731471845e-07, - "loss": 0.3658, - "step": 15395 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7801454327944676e-07, - "loss": 0.4236, - "step": 15396 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.775477001689442e-07, - "loss": 0.4536, - "step": 15397 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7708124383425074e-07, - "loss": 0.3907, - "step": 15398 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.766151742939216e-07, - "loss": 0.4659, - "step": 15399 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.761494915664975e-07, - "loss": 0.4318, - "step": 15400 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.756841956705036e-07, - "loss": 0.3804, - "step": 15401 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7521928662445186e-07, - "loss": 0.3959, - "step": 15402 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7475476444683626e-07, - "loss": 0.4916, - "step": 15403 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7429062915613756e-07, - "loss": 0.4141, - "step": 15404 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7382688077081774e-07, - "loss": 0.4668, - "step": 15405 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.733635193093265e-07, - "loss": 0.345, - "step": 15406 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7290054479009675e-07, - "loss": 0.3952, - "step": 15407 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.724379572315472e-07, - "loss": 0.3575, - "step": 15408 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.719757566520798e-07, - "loss": 0.4316, - "step": 15409 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7151394307008195e-07, - "loss": 0.391, - "step": 15410 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7105251650392574e-07, - "loss": 0.461, - "step": 15411 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.7059147697196644e-07, - "loss": 0.4571, - "step": 15412 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.701308244925449e-07, - "loss": 0.5066, - "step": 15413 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6967055908398874e-07, - "loss": 0.4376, - "step": 15414 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.692106807646067e-07, - "loss": 0.4025, - "step": 15415 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6875118955269286e-07, - "loss": 0.3948, - "step": 15416 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.682920854665261e-07, - "loss": 0.4557, - "step": 15417 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.678333685243728e-07, - "loss": 0.5364, - "step": 15418 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6737503874448066e-07, - "loss": 0.4476, - "step": 15419 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.669170961450829e-07, - "loss": 0.4548, - "step": 15420 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6645954074439483e-07, - "loss": 0.3719, - "step": 15421 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.660023725606209e-07, - "loss": 0.4366, - "step": 15422 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.655455916119487e-07, - "loss": 0.4169, - "step": 15423 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.650891979165493e-07, - "loss": 0.5643, - "step": 15424 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.646331914925782e-07, - "loss": 0.3516, - "step": 15425 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.641775723581763e-07, - "loss": 0.4905, - "step": 15426 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.637223405314682e-07, - "loss": 0.3307, - "step": 15427 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.632674960305648e-07, - "loss": 0.4011, - "step": 15428 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6281303887356167e-07, - "loss": 0.4637, - "step": 15429 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.623589690785355e-07, - "loss": 0.4022, - "step": 15430 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6190528666355073e-07, - "loss": 0.44, - "step": 15431 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6145199164665626e-07, - "loss": 0.3814, - "step": 15432 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.6099908404588534e-07, - "loss": 0.4584, - "step": 15433 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.605465638792537e-07, - "loss": 0.513, - "step": 15434 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.600944311647646e-07, - "loss": 0.4817, - "step": 15435 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5964268592040486e-07, - "loss": 0.4078, - "step": 15436 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.591913281641445e-07, - "loss": 0.3601, - "step": 15437 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5874035791394026e-07, - "loss": 0.4771, - "step": 15438 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5828977518773225e-07, - "loss": 0.4225, - "step": 15439 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.57839580003445e-07, - "loss": 0.4415, - "step": 15440 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.573897723789898e-07, - "loss": 0.4002, - "step": 15441 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5694035233226e-07, - "loss": 0.4087, - "step": 15442 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.564913198811325e-07, - "loss": 0.4073, - "step": 15443 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.560426750434708e-07, - "loss": 0.4582, - "step": 15444 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.55594417837125e-07, - "loss": 0.5705, - "step": 15445 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5514654827992647e-07, - "loss": 0.4345, - "step": 15446 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.54699066389692e-07, - "loss": 0.5118, - "step": 15447 - }, - { - "epoch": 2.79, - "grad_norm": 0.0, - "learning_rate": 2.5425197218422294e-07, - "loss": 0.4797, - "step": 15448 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.5380526568130613e-07, - "loss": 0.5039, - "step": 15449 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.53358946898713e-07, - "loss": 0.4338, - "step": 15450 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.529130158541959e-07, - "loss": 0.3699, - "step": 15451 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.5246747256549854e-07, - "loss": 0.4381, - "step": 15452 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.520223170503422e-07, - "loss": 0.3855, - "step": 15453 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.515775493264383e-07, - "loss": 0.4409, - "step": 15454 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.5113316941147824e-07, - "loss": 0.3815, - "step": 15455 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.5068917732314234e-07, - "loss": 0.5015, - "step": 15456 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.502455730790931e-07, - "loss": 0.4522, - "step": 15457 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.498023566969765e-07, - "loss": 0.4934, - "step": 15458 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.49359528194425e-07, - "loss": 0.3915, - "step": 15459 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.489170875890545e-07, - "loss": 0.509, - "step": 15460 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.484750348984677e-07, - "loss": 0.4096, - "step": 15461 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.480333701402493e-07, - "loss": 0.4184, - "step": 15462 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4759209333196977e-07, - "loss": 0.4181, - "step": 15463 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4715120449118277e-07, - "loss": 0.4338, - "step": 15464 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.467107036354288e-07, - "loss": 0.3947, - "step": 15465 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.462705907822316e-07, - "loss": 0.4095, - "step": 15466 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4583086594909934e-07, - "loss": 0.4021, - "step": 15467 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.453915291535247e-07, - "loss": 0.4109, - "step": 15468 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4495258041298597e-07, - "loss": 0.3516, - "step": 15469 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.445140197449447e-07, - "loss": 0.4687, - "step": 15470 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4407584716684804e-07, - "loss": 0.502, - "step": 15471 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4363806269612655e-07, - "loss": 0.4231, - "step": 15472 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4320066635019734e-07, - "loss": 0.4026, - "step": 15473 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4276365814645874e-07, - "loss": 0.3748, - "step": 15474 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.4232703810229794e-07, - "loss": 0.398, - "step": 15475 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.418908062350833e-07, - "loss": 0.3722, - "step": 15476 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.414549625621676e-07, - "loss": 0.5024, - "step": 15477 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.410195071008914e-07, - "loss": 0.4491, - "step": 15478 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.405844398685775e-07, - "loss": 0.4474, - "step": 15479 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.401497608825343e-07, - "loss": 0.3527, - "step": 15480 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3971547016005345e-07, - "loss": 0.3911, - "step": 15481 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3928156771840906e-07, - "loss": 0.4214, - "step": 15482 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3884805357486604e-07, - "loss": 0.4693, - "step": 15483 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3841492774666964e-07, - "loss": 0.4082, - "step": 15484 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.379821902510504e-07, - "loss": 0.4699, - "step": 15485 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.375498411052224e-07, - "loss": 0.4794, - "step": 15486 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3711788032638517e-07, - "loss": 0.3913, - "step": 15487 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3668630793172386e-07, - "loss": 0.3705, - "step": 15488 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.36255123938407e-07, - "loss": 0.4516, - "step": 15489 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.358243283635875e-07, - "loss": 0.4648, - "step": 15490 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3539392122440275e-07, - "loss": 0.4275, - "step": 15491 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3496390253797575e-07, - "loss": 0.4358, - "step": 15492 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.345342723214128e-07, - "loss": 0.436, - "step": 15493 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.341050305918058e-07, - "loss": 0.4631, - "step": 15494 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3367617736623104e-07, - "loss": 0.4638, - "step": 15495 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.332477126617483e-07, - "loss": 0.4165, - "step": 15496 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3281963649540384e-07, - "loss": 0.4721, - "step": 15497 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.3239194888422413e-07, - "loss": 0.4168, - "step": 15498 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.319646498452266e-07, - "loss": 0.3637, - "step": 15499 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.315377393954099e-07, - "loss": 0.4179, - "step": 15500 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.311112175517549e-07, - "loss": 0.416, - "step": 15501 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.306850843312325e-07, - "loss": 0.425, - "step": 15502 - }, - { - "epoch": 2.8, - "grad_norm": 0.0, - "learning_rate": 2.302593397507913e-07, - "loss": 0.5059, - "step": 15503 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2983398382737e-07, - "loss": 0.4147, - "step": 15504 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.294090165778906e-07, - "loss": 0.4034, - "step": 15505 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2898443801925963e-07, - "loss": 0.5051, - "step": 15506 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2856024816836464e-07, - "loss": 0.4583, - "step": 15507 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2813644704208328e-07, - "loss": 0.452, - "step": 15508 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2771303465727312e-07, - "loss": 0.4763, - "step": 15509 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2729001103078184e-07, - "loss": 0.4088, - "step": 15510 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2686737617943377e-07, - "loss": 0.4414, - "step": 15511 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.264451301200432e-07, - "loss": 0.5238, - "step": 15512 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2602327286941007e-07, - "loss": 0.4239, - "step": 15513 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2560180444431202e-07, - "loss": 0.4537, - "step": 15514 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.251807248615212e-07, - "loss": 0.4436, - "step": 15515 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2476003413778537e-07, - "loss": 0.4455, - "step": 15516 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2433973228984217e-07, - "loss": 0.5177, - "step": 15517 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2391981933441053e-07, - "loss": 0.457, - "step": 15518 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2350029528819594e-07, - "loss": 0.4478, - "step": 15519 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2308116016788729e-07, - "loss": 0.472, - "step": 15520 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2266241399015896e-07, - "loss": 0.3672, - "step": 15521 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2224405677166994e-07, - "loss": 0.3958, - "step": 15522 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.218260885290613e-07, - "loss": 0.3879, - "step": 15523 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2140850927896306e-07, - "loss": 0.3845, - "step": 15524 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2099131903798531e-07, - "loss": 0.4536, - "step": 15525 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.2057451782272588e-07, - "loss": 0.3614, - "step": 15526 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.201581056497648e-07, - "loss": 0.4247, - "step": 15527 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1974208253566775e-07, - "loss": 0.4293, - "step": 15528 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.193264484969859e-07, - "loss": 0.4587, - "step": 15529 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1891120355025276e-07, - "loss": 0.5015, - "step": 15530 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1849634771198724e-07, - "loss": 0.4392, - "step": 15531 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1808188099869398e-07, - "loss": 0.3782, - "step": 15532 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.17667803426862e-07, - "loss": 0.4541, - "step": 15533 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1725411501296145e-07, - "loss": 0.4893, - "step": 15534 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.168408157734525e-07, - "loss": 0.4346, - "step": 15535 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1642790572477312e-07, - "loss": 0.4212, - "step": 15536 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.160153848833535e-07, - "loss": 0.4794, - "step": 15537 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.156032532656016e-07, - "loss": 0.4306, - "step": 15538 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1519151088791545e-07, - "loss": 0.3858, - "step": 15539 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1478015776667303e-07, - "loss": 0.3834, - "step": 15540 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1436919391823797e-07, - "loss": 0.441, - "step": 15541 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1395861935896045e-07, - "loss": 0.4103, - "step": 15542 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.13548434105173e-07, - "loss": 0.4597, - "step": 15543 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.131386381731948e-07, - "loss": 0.43, - "step": 15544 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1272923157932722e-07, - "loss": 0.4745, - "step": 15545 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1232021433985727e-07, - "loss": 0.481, - "step": 15546 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1191158647105525e-07, - "loss": 0.4835, - "step": 15547 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1150334798918037e-07, - "loss": 0.4792, - "step": 15548 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.1109549891047077e-07, - "loss": 0.4835, - "step": 15549 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.106880392511501e-07, - "loss": 0.4265, - "step": 15550 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.102809690274299e-07, - "loss": 0.4115, - "step": 15551 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.098742882555027e-07, - "loss": 0.4873, - "step": 15552 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0946799695154897e-07, - "loss": 0.3959, - "step": 15553 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0906209513173014e-07, - "loss": 0.4506, - "step": 15554 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0865658281219446e-07, - "loss": 0.4871, - "step": 15555 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0825146000907236e-07, - "loss": 0.4508, - "step": 15556 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0784672673848094e-07, - "loss": 0.4076, - "step": 15557 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0744238301652286e-07, - "loss": 0.4287, - "step": 15558 - }, - { - "epoch": 2.81, - "grad_norm": 0.0, - "learning_rate": 2.0703842885928194e-07, - "loss": 0.3638, - "step": 15559 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0663486428282865e-07, - "loss": 0.3966, - "step": 15560 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.062316893032168e-07, - "loss": 0.3689, - "step": 15561 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.058289039364869e-07, - "loss": 0.4423, - "step": 15562 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0542650819866172e-07, - "loss": 0.4318, - "step": 15563 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0502450210574843e-07, - "loss": 0.3812, - "step": 15564 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0462288567373977e-07, - "loss": 0.4208, - "step": 15565 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0422165891861413e-07, - "loss": 0.3807, - "step": 15566 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0382082185633201e-07, - "loss": 0.3734, - "step": 15567 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0342037450283847e-07, - "loss": 0.5026, - "step": 15568 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.030203168740641e-07, - "loss": 0.4556, - "step": 15569 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0262064898592615e-07, - "loss": 0.3767, - "step": 15570 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0222137085432414e-07, - "loss": 0.4292, - "step": 15571 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.018224824951387e-07, - "loss": 0.466, - "step": 15572 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0142398392423935e-07, - "loss": 0.4635, - "step": 15573 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0102587515748006e-07, - "loss": 0.4702, - "step": 15574 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.006281562106993e-07, - "loss": 0.3401, - "step": 15575 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 2.0023082709971663e-07, - "loss": 0.3319, - "step": 15576 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9983388784034052e-07, - "loss": 0.4657, - "step": 15577 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9943733844836056e-07, - "loss": 0.4744, - "step": 15578 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.990411789395519e-07, - "loss": 0.4734, - "step": 15579 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9864540932967636e-07, - "loss": 0.4084, - "step": 15580 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9825002963447582e-07, - "loss": 0.4561, - "step": 15581 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9785503986968102e-07, - "loss": 0.4594, - "step": 15582 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9746044005100497e-07, - "loss": 0.389, - "step": 15583 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9706623019414505e-07, - "loss": 0.4492, - "step": 15584 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9667241031478323e-07, - "loss": 0.3936, - "step": 15585 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9627898042858807e-07, - "loss": 0.4475, - "step": 15586 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9588594055121035e-07, - "loss": 0.4225, - "step": 15587 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9549329069828426e-07, - "loss": 0.4294, - "step": 15588 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9510103088543064e-07, - "loss": 0.4075, - "step": 15589 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9470916112825477e-07, - "loss": 0.4047, - "step": 15590 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9431768144234643e-07, - "loss": 0.3987, - "step": 15591 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9392659184327867e-07, - "loss": 0.3633, - "step": 15592 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.935358923466102e-07, - "loss": 0.435, - "step": 15593 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.93145582967883e-07, - "loss": 0.4182, - "step": 15594 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9275566372262356e-07, - "loss": 0.5191, - "step": 15595 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9236613462634614e-07, - "loss": 0.3455, - "step": 15596 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9197699569454497e-07, - "loss": 0.4023, - "step": 15597 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9158824694269995e-07, - "loss": 0.3789, - "step": 15598 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9119988838627866e-07, - "loss": 0.4497, - "step": 15599 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.908119200407288e-07, - "loss": 0.4473, - "step": 15600 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9042434192148352e-07, - "loss": 0.45, - "step": 15601 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.9003715404396384e-07, - "loss": 0.3632, - "step": 15602 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8965035642357188e-07, - "loss": 0.4841, - "step": 15603 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8926394907569422e-07, - "loss": 0.4267, - "step": 15604 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8887793201570415e-07, - "loss": 0.5343, - "step": 15605 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.884923052589549e-07, - "loss": 0.4506, - "step": 15606 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8810706882079199e-07, - "loss": 0.4502, - "step": 15607 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8772222271653874e-07, - "loss": 0.407, - "step": 15608 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.873377669615051e-07, - "loss": 0.4727, - "step": 15609 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8695370157098435e-07, - "loss": 0.4091, - "step": 15610 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8657002656025435e-07, - "loss": 0.4452, - "step": 15611 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8618674194458175e-07, - "loss": 0.4204, - "step": 15612 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8580384773921324e-07, - "loss": 0.4082, - "step": 15613 - }, - { - "epoch": 2.82, - "grad_norm": 0.0, - "learning_rate": 1.8542134395937882e-07, - "loss": 0.4072, - "step": 15614 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8503923062029748e-07, - "loss": 0.4851, - "step": 15615 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8465750773716928e-07, - "loss": 0.5209, - "step": 15616 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8427617532518093e-07, - "loss": 0.4353, - "step": 15617 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8389523339950033e-07, - "loss": 0.4299, - "step": 15618 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.835146819752842e-07, - "loss": 0.3981, - "step": 15619 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8313452106766937e-07, - "loss": 0.4391, - "step": 15620 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.827547506917815e-07, - "loss": 0.4399, - "step": 15621 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.823753708627274e-07, - "loss": 0.4448, - "step": 15622 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.819963815955983e-07, - "loss": 0.4732, - "step": 15623 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.816177829054744e-07, - "loss": 0.4126, - "step": 15624 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8123957480741472e-07, - "loss": 0.4229, - "step": 15625 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8086175731646394e-07, - "loss": 0.4539, - "step": 15626 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8048433044765445e-07, - "loss": 0.4662, - "step": 15627 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.8010729421599872e-07, - "loss": 0.4295, - "step": 15628 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7973064863649804e-07, - "loss": 0.3995, - "step": 15629 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7935439372413599e-07, - "loss": 0.5065, - "step": 15630 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.789785294938806e-07, - "loss": 0.442, - "step": 15631 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7860305596068218e-07, - "loss": 0.3925, - "step": 15632 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.782279731394787e-07, - "loss": 0.4853, - "step": 15633 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7785328104519274e-07, - "loss": 0.3835, - "step": 15634 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7747897969272898e-07, - "loss": 0.3959, - "step": 15635 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7710506909697888e-07, - "loss": 0.4093, - "step": 15636 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7673154927281722e-07, - "loss": 0.3906, - "step": 15637 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7635842023510208e-07, - "loss": 0.4257, - "step": 15638 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7598568199867716e-07, - "loss": 0.4003, - "step": 15639 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7561333457837283e-07, - "loss": 0.382, - "step": 15640 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7524137798899942e-07, - "loss": 0.4471, - "step": 15641 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7486981224535403e-07, - "loss": 0.4279, - "step": 15642 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7449863736221928e-07, - "loss": 0.4525, - "step": 15643 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.741278533543611e-07, - "loss": 0.453, - "step": 15644 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7375746023652883e-07, - "loss": 0.4509, - "step": 15645 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7338745802345957e-07, - "loss": 0.4542, - "step": 15646 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7301784672987044e-07, - "loss": 0.3889, - "step": 15647 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7264862637046632e-07, - "loss": 0.4122, - "step": 15648 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7227979695993323e-07, - "loss": 0.4689, - "step": 15649 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7191135851294726e-07, - "loss": 0.5205, - "step": 15650 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7154331104416332e-07, - "loss": 0.5187, - "step": 15651 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7117565456822304e-07, - "loss": 0.4123, - "step": 15652 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.708083890997547e-07, - "loss": 0.4763, - "step": 15653 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7044151465336556e-07, - "loss": 0.4097, - "step": 15654 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.7007503124365165e-07, - "loss": 0.4249, - "step": 15655 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6970893888519357e-07, - "loss": 0.4197, - "step": 15656 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6934323759255412e-07, - "loss": 0.4083, - "step": 15657 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6897792738028163e-07, - "loss": 0.4316, - "step": 15658 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6861300826290784e-07, - "loss": 0.4215, - "step": 15659 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.682484802549511e-07, - "loss": 0.4766, - "step": 15660 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6788434337091207e-07, - "loss": 0.4793, - "step": 15661 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6752059762527806e-07, - "loss": 0.4246, - "step": 15662 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6715724303251857e-07, - "loss": 0.4616, - "step": 15663 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.667942796070876e-07, - "loss": 0.5006, - "step": 15664 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6643170736342585e-07, - "loss": 0.4129, - "step": 15665 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6606952631595619e-07, - "loss": 0.505, - "step": 15666 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6570773647908823e-07, - "loss": 0.4502, - "step": 15667 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6534633786721267e-07, - "loss": 0.4333, - "step": 15668 - }, - { - "epoch": 2.83, - "grad_norm": 0.0, - "learning_rate": 1.6498533049470911e-07, - "loss": 0.4194, - "step": 15669 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6462471437593607e-07, - "loss": 0.3391, - "step": 15670 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.64264489525241e-07, - "loss": 0.3983, - "step": 15671 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6390465595695348e-07, - "loss": 0.4099, - "step": 15672 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.635452136853899e-07, - "loss": 0.4177, - "step": 15673 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6318616272484767e-07, - "loss": 0.5418, - "step": 15674 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6282750308961204e-07, - "loss": 0.4377, - "step": 15675 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.624692347939505e-07, - "loss": 0.5407, - "step": 15676 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6211135785211495e-07, - "loss": 0.3783, - "step": 15677 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6175387227834293e-07, - "loss": 0.4305, - "step": 15678 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6139677808685528e-07, - "loss": 0.515, - "step": 15679 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.610400752918595e-07, - "loss": 0.3943, - "step": 15680 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6068376390754315e-07, - "loss": 0.4155, - "step": 15681 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.6032784394808265e-07, - "loss": 0.421, - "step": 15682 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5997231542763668e-07, - "loss": 0.494, - "step": 15683 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.596171783603506e-07, - "loss": 0.4295, - "step": 15684 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.592624327603498e-07, - "loss": 0.4643, - "step": 15685 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5890807864174628e-07, - "loss": 0.4543, - "step": 15686 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.585541160186388e-07, - "loss": 0.4693, - "step": 15687 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5820054490510827e-07, - "loss": 0.3525, - "step": 15688 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5784736531521905e-07, - "loss": 0.4737, - "step": 15689 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.574945772630232e-07, - "loss": 0.395, - "step": 15690 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5714218076255394e-07, - "loss": 0.37, - "step": 15691 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5679017582783118e-07, - "loss": 0.425, - "step": 15692 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5643856247285706e-07, - "loss": 0.4794, - "step": 15693 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5608734071161925e-07, - "loss": 0.4252, - "step": 15694 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.55736510558091e-07, - "loss": 0.401, - "step": 15695 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5538607202622902e-07, - "loss": 0.5176, - "step": 15696 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5503602512997318e-07, - "loss": 0.4798, - "step": 15697 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5468636988324903e-07, - "loss": 0.5082, - "step": 15698 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5433710629996878e-07, - "loss": 0.41, - "step": 15699 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5398823439402466e-07, - "loss": 0.3795, - "step": 15700 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5363975417929445e-07, - "loss": 0.4993, - "step": 15701 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.532916656696437e-07, - "loss": 0.468, - "step": 15702 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5294396887891806e-07, - "loss": 0.4115, - "step": 15703 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5259666382095084e-07, - "loss": 0.442, - "step": 15704 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.522497505095577e-07, - "loss": 0.3978, - "step": 15705 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.519032289585387e-07, - "loss": 0.4133, - "step": 15706 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.515570991816817e-07, - "loss": 0.4402, - "step": 15707 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5121136119275458e-07, - "loss": 0.3743, - "step": 15708 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5086601500550968e-07, - "loss": 0.4013, - "step": 15709 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.505210606336882e-07, - "loss": 0.4333, - "step": 15710 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.5017649809101143e-07, - "loss": 0.4089, - "step": 15711 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4983232739118836e-07, - "loss": 0.4237, - "step": 15712 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4948854854790916e-07, - "loss": 0.3983, - "step": 15713 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.491451615748496e-07, - "loss": 0.4487, - "step": 15714 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4880216648567202e-07, - "loss": 0.4391, - "step": 15715 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4845956329402e-07, - "loss": 0.4222, - "step": 15716 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4811735201352262e-07, - "loss": 0.4151, - "step": 15717 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4777553265779455e-07, - "loss": 0.3395, - "step": 15718 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4743410524043378e-07, - "loss": 0.4456, - "step": 15719 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.470930697750217e-07, - "loss": 0.5194, - "step": 15720 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4675242627512742e-07, - "loss": 0.4893, - "step": 15721 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4641217475430126e-07, - "loss": 0.4447, - "step": 15722 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4607231522607901e-07, - "loss": 0.418, - "step": 15723 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4573284770397988e-07, - "loss": 0.4678, - "step": 15724 - }, - { - "epoch": 2.84, - "grad_norm": 0.0, - "learning_rate": 1.4539377220150975e-07, - "loss": 0.4627, - "step": 15725 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4505508873215667e-07, - "loss": 0.4305, - "step": 15726 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4471679730939548e-07, - "loss": 0.5028, - "step": 15727 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4437889794668314e-07, - "loss": 0.4876, - "step": 15728 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4404139065746115e-07, - "loss": 0.4713, - "step": 15729 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4370427545515765e-07, - "loss": 0.41, - "step": 15730 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4336755235318188e-07, - "loss": 0.3944, - "step": 15731 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4303122136493098e-07, - "loss": 0.4061, - "step": 15732 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4269528250378416e-07, - "loss": 0.3603, - "step": 15733 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4235973578310526e-07, - "loss": 0.4684, - "step": 15734 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.420245812162435e-07, - "loss": 0.4583, - "step": 15735 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4168981881652943e-07, - "loss": 0.3805, - "step": 15736 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4135544859728567e-07, - "loss": 0.4338, - "step": 15737 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4102147057180938e-07, - "loss": 0.4338, - "step": 15738 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.406878847533888e-07, - "loss": 0.3648, - "step": 15739 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4035469115529333e-07, - "loss": 0.4723, - "step": 15740 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.4002188979077903e-07, - "loss": 0.4124, - "step": 15741 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.396894806730853e-07, - "loss": 0.4854, - "step": 15742 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3935746381543603e-07, - "loss": 0.4796, - "step": 15743 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3902583923103842e-07, - "loss": 0.4611, - "step": 15744 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3869460693308635e-07, - "loss": 0.4215, - "step": 15745 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3836376693475483e-07, - "loss": 0.4871, - "step": 15746 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.380333192492078e-07, - "loss": 0.3972, - "step": 15747 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.377032638895892e-07, - "loss": 0.4728, - "step": 15748 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3737360086903074e-07, - "loss": 0.5168, - "step": 15749 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3704433020064522e-07, - "loss": 0.3725, - "step": 15750 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3671545189753333e-07, - "loss": 0.479, - "step": 15751 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3638696597277678e-07, - "loss": 0.4703, - "step": 15752 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3605887243944405e-07, - "loss": 0.4535, - "step": 15753 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.357311713105869e-07, - "loss": 0.4008, - "step": 15754 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3540386259924266e-07, - "loss": 0.4799, - "step": 15755 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3507694631843203e-07, - "loss": 0.444, - "step": 15756 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.347504224811591e-07, - "loss": 0.4171, - "step": 15757 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3442429110041455e-07, - "loss": 0.404, - "step": 15758 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3409855218917356e-07, - "loss": 0.446, - "step": 15759 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3377320576039354e-07, - "loss": 0.4713, - "step": 15760 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3344825182701637e-07, - "loss": 0.4923, - "step": 15761 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3312369040196947e-07, - "loss": 0.471, - "step": 15762 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3279952149816478e-07, - "loss": 0.4724, - "step": 15763 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.324757451284997e-07, - "loss": 0.4979, - "step": 15764 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3215236130585284e-07, - "loss": 0.4155, - "step": 15765 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.318293700430906e-07, - "loss": 0.4237, - "step": 15766 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3150677135306155e-07, - "loss": 0.4177, - "step": 15767 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3118456524859658e-07, - "loss": 0.4112, - "step": 15768 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.308627517425176e-07, - "loss": 0.4423, - "step": 15769 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.3054133084762442e-07, - "loss": 0.427, - "step": 15770 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.302203025767046e-07, - "loss": 0.4489, - "step": 15771 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2989966694252897e-07, - "loss": 0.383, - "step": 15772 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2957942395785294e-07, - "loss": 0.3675, - "step": 15773 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2925957363541518e-07, - "loss": 0.4627, - "step": 15774 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2894011598794332e-07, - "loss": 0.4189, - "step": 15775 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2862105102814272e-07, - "loss": 0.4739, - "step": 15776 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2830237876870765e-07, - "loss": 0.3957, - "step": 15777 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.279840992223147e-07, - "loss": 0.4233, - "step": 15778 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2766621240162591e-07, - "loss": 0.4663, - "step": 15779 - }, - { - "epoch": 2.85, - "grad_norm": 0.0, - "learning_rate": 1.2734871831928673e-07, - "loss": 0.3879, - "step": 15780 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.270316169879293e-07, - "loss": 0.4401, - "step": 15781 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2671490842016798e-07, - "loss": 0.4301, - "step": 15782 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2639859262860154e-07, - "loss": 0.4931, - "step": 15783 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2608266962581218e-07, - "loss": 0.4759, - "step": 15784 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.257671394243698e-07, - "loss": 0.4376, - "step": 15785 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2545200203682662e-07, - "loss": 0.4762, - "step": 15786 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2513725747571813e-07, - "loss": 0.4272, - "step": 15787 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.248229057535666e-07, - "loss": 0.3909, - "step": 15788 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.245089468828764e-07, - "loss": 0.3864, - "step": 15789 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.241953808761387e-07, - "loss": 0.4715, - "step": 15790 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.238822077458257e-07, - "loss": 0.4309, - "step": 15791 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.235694275043986e-07, - "loss": 0.4847, - "step": 15792 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2325704016429852e-07, - "loss": 0.4775, - "step": 15793 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2294504573795329e-07, - "loss": 0.419, - "step": 15794 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2263344423777412e-07, - "loss": 0.4226, - "step": 15795 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2232223567615663e-07, - "loss": 0.4187, - "step": 15796 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2201142006548317e-07, - "loss": 0.4522, - "step": 15797 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2170099741811714e-07, - "loss": 0.4125, - "step": 15798 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2139096774640868e-07, - "loss": 0.4559, - "step": 15799 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2108133106268905e-07, - "loss": 0.4589, - "step": 15800 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2077208737927837e-07, - "loss": 0.4286, - "step": 15801 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2046323670847794e-07, - "loss": 0.419, - "step": 15802 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.2015477906257456e-07, - "loss": 0.3607, - "step": 15803 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1984671445383845e-07, - "loss": 0.3783, - "step": 15804 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1953904289452756e-07, - "loss": 0.418, - "step": 15805 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1923176439687767e-07, - "loss": 0.4522, - "step": 15806 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1892487897311566e-07, - "loss": 0.4627, - "step": 15807 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1861838663544955e-07, - "loss": 0.3668, - "step": 15808 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1831228739607182e-07, - "loss": 0.4738, - "step": 15809 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1800658126715936e-07, - "loss": 0.5098, - "step": 15810 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1770126826087358e-07, - "loss": 0.3641, - "step": 15811 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1739634838936031e-07, - "loss": 0.4592, - "step": 15812 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1709182166475097e-07, - "loss": 0.4096, - "step": 15813 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1678768809915919e-07, - "loss": 0.4696, - "step": 15814 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1648394770468418e-07, - "loss": 0.4057, - "step": 15815 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1618060049340852e-07, - "loss": 0.4169, - "step": 15816 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.158776464774003e-07, - "loss": 0.4274, - "step": 15817 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1557508566871323e-07, - "loss": 0.4237, - "step": 15818 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1527291807938101e-07, - "loss": 0.3889, - "step": 15819 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1497114372142625e-07, - "loss": 0.4821, - "step": 15820 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.146697626068527e-07, - "loss": 0.4169, - "step": 15821 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1436877474765074e-07, - "loss": 0.5344, - "step": 15822 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1406818015579413e-07, - "loss": 0.3991, - "step": 15823 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1376797884324109e-07, - "loss": 0.4596, - "step": 15824 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1346817082193318e-07, - "loss": 0.436, - "step": 15825 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1316875610379862e-07, - "loss": 0.5059, - "step": 15826 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1286973470074791e-07, - "loss": 0.3963, - "step": 15827 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1257110662467597e-07, - "loss": 0.427, - "step": 15828 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1227287188746438e-07, - "loss": 0.4255, - "step": 15829 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1197503050097591e-07, - "loss": 0.3951, - "step": 15830 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1167758247706106e-07, - "loss": 0.4089, - "step": 15831 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1138052782755038e-07, - "loss": 0.3909, - "step": 15832 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1108386656426218e-07, - "loss": 0.4389, - "step": 15833 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1078759869899925e-07, - "loss": 0.4049, - "step": 15834 - }, - { - "epoch": 2.86, - "grad_norm": 0.0, - "learning_rate": 1.1049172424354659e-07, - "loss": 0.4139, - "step": 15835 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.1019624320967592e-07, - "loss": 0.5373, - "step": 15836 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0990115560914006e-07, - "loss": 0.467, - "step": 15837 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0960646145367737e-07, - "loss": 0.4814, - "step": 15838 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0931216075501516e-07, - "loss": 0.4665, - "step": 15839 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.090182535248574e-07, - "loss": 0.3722, - "step": 15840 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0872473977489806e-07, - "loss": 0.3691, - "step": 15841 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0843161951681336e-07, - "loss": 0.4036, - "step": 15842 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0813889276226397e-07, - "loss": 0.4715, - "step": 15843 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0784655952289614e-07, - "loss": 0.473, - "step": 15844 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0755461981033721e-07, - "loss": 0.4396, - "step": 15845 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0726307363620237e-07, - "loss": 0.3649, - "step": 15846 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0697192101209009e-07, - "loss": 0.4482, - "step": 15847 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0668116194958222e-07, - "loss": 0.477, - "step": 15848 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0639079646024508e-07, - "loss": 0.3684, - "step": 15849 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0610082455563165e-07, - "loss": 0.503, - "step": 15850 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0581124624727712e-07, - "loss": 0.4517, - "step": 15851 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0552206154670008e-07, - "loss": 0.4133, - "step": 15852 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0523327046540577e-07, - "loss": 0.4239, - "step": 15853 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0494487301488276e-07, - "loss": 0.4871, - "step": 15854 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.04656869206603e-07, - "loss": 0.5015, - "step": 15855 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0436925905202511e-07, - "loss": 0.4052, - "step": 15856 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0408204256259102e-07, - "loss": 0.4822, - "step": 15857 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0379521974972606e-07, - "loss": 0.4231, - "step": 15858 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0350879062483999e-07, - "loss": 0.3704, - "step": 15859 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.03222755199327e-07, - "loss": 0.4032, - "step": 15860 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0293711348456692e-07, - "loss": 0.4828, - "step": 15861 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0265186549192396e-07, - "loss": 0.4288, - "step": 15862 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0236701123274462e-07, - "loss": 0.4012, - "step": 15863 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0208255071836204e-07, - "loss": 0.3807, - "step": 15864 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0179848396009051e-07, - "loss": 0.4482, - "step": 15865 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.015148109692321e-07, - "loss": 0.4046, - "step": 15866 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0123153175707112e-07, - "loss": 0.437, - "step": 15867 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0094864633487855e-07, - "loss": 0.4389, - "step": 15868 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.006661547139054e-07, - "loss": 0.4036, - "step": 15869 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0038405690539266e-07, - "loss": 0.509, - "step": 15870 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 1.0010235292055914e-07, - "loss": 0.4289, - "step": 15871 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.982104277061477e-08, - "loss": 0.4763, - "step": 15872 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.954012646674948e-08, - "loss": 0.5121, - "step": 15873 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.925960402013879e-08, - "loss": 0.4418, - "step": 15874 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.897947544194154e-08, - "loss": 0.427, - "step": 15875 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.869974074330102e-08, - "loss": 0.3409, - "step": 15876 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.842039993534724e-08, - "loss": 0.4093, - "step": 15877 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.814145302919243e-08, - "loss": 0.4672, - "step": 15878 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.786290003593324e-08, - "loss": 0.3798, - "step": 15879 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.758474096665194e-08, - "loss": 0.4286, - "step": 15880 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.7306975832413e-08, - "loss": 0.389, - "step": 15881 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.702960464426648e-08, - "loss": 0.4693, - "step": 15882 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.675262741324798e-08, - "loss": 0.3409, - "step": 15883 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.647604415037426e-08, - "loss": 0.374, - "step": 15884 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.619985486664985e-08, - "loss": 0.3833, - "step": 15885 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.592405957306039e-08, - "loss": 0.4826, - "step": 15886 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.564865828057935e-08, - "loss": 0.4363, - "step": 15887 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.537365100016128e-08, - "loss": 0.4414, - "step": 15888 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.509903774274743e-08, - "loss": 0.3559, - "step": 15889 - }, - { - "epoch": 2.87, - "grad_norm": 0.0, - "learning_rate": 9.482481851926128e-08, - "loss": 0.3866, - "step": 15890 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.455099334061302e-08, - "loss": 0.4257, - "step": 15891 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.427756221769502e-08, - "loss": 0.4676, - "step": 15892 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.400452516138414e-08, - "loss": 0.537, - "step": 15893 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.373188218254392e-08, - "loss": 0.3916, - "step": 15894 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.345963329202012e-08, - "loss": 0.479, - "step": 15895 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.318777850064298e-08, - "loss": 0.4477, - "step": 15896 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.291631781922717e-08, - "loss": 0.4256, - "step": 15897 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.264525125857071e-08, - "loss": 0.5157, - "step": 15898 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.237457882945943e-08, - "loss": 0.4883, - "step": 15899 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.210430054266029e-08, - "loss": 0.4333, - "step": 15900 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.183441640892576e-08, - "loss": 0.3808, - "step": 15901 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.156492643899062e-08, - "loss": 0.3604, - "step": 15902 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.129583064357738e-08, - "loss": 0.4806, - "step": 15903 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.102712903339084e-08, - "loss": 0.4276, - "step": 15904 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.075882161911909e-08, - "loss": 0.5033, - "step": 15905 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.049090841143805e-08, - "loss": 0.4909, - "step": 15906 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 9.022338942100361e-08, - "loss": 0.3714, - "step": 15907 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.995626465846063e-08, - "loss": 0.4416, - "step": 15908 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.968953413443282e-08, - "loss": 0.4517, - "step": 15909 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.942319785953279e-08, - "loss": 0.3887, - "step": 15910 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.915725584435653e-08, - "loss": 0.4589, - "step": 15911 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.889170809948222e-08, - "loss": 0.4147, - "step": 15912 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.862655463547476e-08, - "loss": 0.3985, - "step": 15913 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.836179546288126e-08, - "loss": 0.4175, - "step": 15914 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.809743059223552e-08, - "loss": 0.4458, - "step": 15915 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.783346003405469e-08, - "loss": 0.4052, - "step": 15916 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.756988379883813e-08, - "loss": 0.4129, - "step": 15917 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.730670189707413e-08, - "loss": 0.4577, - "step": 15918 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.704391433922987e-08, - "loss": 0.3968, - "step": 15919 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.678152113576032e-08, - "loss": 0.4124, - "step": 15920 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.651952229710492e-08, - "loss": 0.3498, - "step": 15921 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.625791783368532e-08, - "loss": 0.3745, - "step": 15922 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.599670775590874e-08, - "loss": 0.372, - "step": 15923 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.573589207416688e-08, - "loss": 0.4153, - "step": 15924 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.547547079883477e-08, - "loss": 0.498, - "step": 15925 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.521544394027414e-08, - "loss": 0.4239, - "step": 15926 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.495581150882782e-08, - "loss": 0.4022, - "step": 15927 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.469657351482418e-08, - "loss": 0.4773, - "step": 15928 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.443772996857613e-08, - "loss": 0.4293, - "step": 15929 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.417928088038208e-08, - "loss": 0.4913, - "step": 15930 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.392122626052379e-08, - "loss": 0.3879, - "step": 15931 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.36635661192653e-08, - "loss": 0.4193, - "step": 15932 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.340630046685838e-08, - "loss": 0.4326, - "step": 15933 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.314942931353709e-08, - "loss": 0.5171, - "step": 15934 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.28929526695188e-08, - "loss": 0.3778, - "step": 15935 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.263687054500869e-08, - "loss": 0.4308, - "step": 15936 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.238118295019304e-08, - "loss": 0.5086, - "step": 15937 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.212588989524373e-08, - "loss": 0.4219, - "step": 15938 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.187099139031707e-08, - "loss": 0.47, - "step": 15939 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.161648744555272e-08, - "loss": 0.397, - "step": 15940 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.136237807107594e-08, - "loss": 0.4964, - "step": 15941 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.11086632769953e-08, - "loss": 0.4044, - "step": 15942 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.085534307340381e-08, - "loss": 0.4389, - "step": 15943 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.060241747038011e-08, - "loss": 0.4307, - "step": 15944 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.034988647798392e-08, - "loss": 0.4568, - "step": 15945 - }, - { - "epoch": 2.88, - "grad_norm": 0.0, - "learning_rate": 8.009775010626274e-08, - "loss": 0.4171, - "step": 15946 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.984600836524637e-08, - "loss": 0.5389, - "step": 15947 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.959466126495118e-08, - "loss": 0.4, - "step": 15948 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.934370881537478e-08, - "loss": 0.3916, - "step": 15949 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.909315102650028e-08, - "loss": 0.4111, - "step": 15950 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.884298790829526e-08, - "loss": 0.4141, - "step": 15951 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.859321947071285e-08, - "loss": 0.5202, - "step": 15952 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.834384572368736e-08, - "loss": 0.4284, - "step": 15953 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.809486667714195e-08, - "loss": 0.4294, - "step": 15954 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.784628234097868e-08, - "loss": 0.4345, - "step": 15955 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.759809272508856e-08, - "loss": 0.4393, - "step": 15956 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.735029783934367e-08, - "loss": 0.4455, - "step": 15957 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.71028976936028e-08, - "loss": 0.4513, - "step": 15958 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.685589229770806e-08, - "loss": 0.4537, - "step": 15959 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.660928166148496e-08, - "loss": 0.5436, - "step": 15960 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.636306579474451e-08, - "loss": 0.3412, - "step": 15961 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.611724470727999e-08, - "loss": 0.3667, - "step": 15962 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.587181840887358e-08, - "loss": 0.3907, - "step": 15963 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.562678690928637e-08, - "loss": 0.4424, - "step": 15964 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.538215021826723e-08, - "loss": 0.4763, - "step": 15965 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.513790834554835e-08, - "loss": 0.4141, - "step": 15966 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.489406130084532e-08, - "loss": 0.5012, - "step": 15967 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.465060909385924e-08, - "loss": 0.4387, - "step": 15968 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.440755173427461e-08, - "loss": 0.4808, - "step": 15969 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.416488923176146e-08, - "loss": 0.4579, - "step": 15970 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.392262159597318e-08, - "loss": 0.4467, - "step": 15971 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.368074883654763e-08, - "loss": 0.3999, - "step": 15972 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.343927096310488e-08, - "loss": 0.451, - "step": 15973 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.319818798525502e-08, - "loss": 0.4422, - "step": 15974 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.295749991258595e-08, - "loss": 0.424, - "step": 15975 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.27172067546733e-08, - "loss": 0.439, - "step": 15976 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.247730852107727e-08, - "loss": 0.3751, - "step": 15977 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.223780522134016e-08, - "loss": 0.4231, - "step": 15978 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.199869686498994e-08, - "loss": 0.4819, - "step": 15979 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.175998346154012e-08, - "loss": 0.4615, - "step": 15980 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.152166502048529e-08, - "loss": 0.4608, - "step": 15981 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.12837415513079e-08, - "loss": 0.4157, - "step": 15982 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.104621306347148e-08, - "loss": 0.4209, - "step": 15983 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.080907956642624e-08, - "loss": 0.4723, - "step": 15984 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.057234106960464e-08, - "loss": 0.4402, - "step": 15985 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.033599758242693e-08, - "loss": 0.4134, - "step": 15986 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 7.010004911429335e-08, - "loss": 0.519, - "step": 15987 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.986449567458975e-08, - "loss": 0.388, - "step": 15988 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.962933727268861e-08, - "loss": 0.423, - "step": 15989 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.939457391794246e-08, - "loss": 0.4234, - "step": 15990 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.916020561969384e-08, - "loss": 0.3685, - "step": 15991 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.892623238726415e-08, - "loss": 0.4239, - "step": 15992 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.869265422996262e-08, - "loss": 0.4279, - "step": 15993 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.845947115707963e-08, - "loss": 0.4996, - "step": 15994 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.822668317789327e-08, - "loss": 0.4726, - "step": 15995 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.799429030166283e-08, - "loss": 0.4985, - "step": 15996 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.776229253763423e-08, - "loss": 0.4265, - "step": 15997 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.753068989503675e-08, - "loss": 0.4835, - "step": 15998 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.729948238308304e-08, - "loss": 0.4562, - "step": 15999 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.70686700109724e-08, - "loss": 0.4015, - "step": 16000 - }, - { - "epoch": 2.89, - "grad_norm": 0.0, - "learning_rate": 6.683825278788525e-08, - "loss": 0.4633, - "step": 16001 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.660823072298983e-08, - "loss": 0.3846, - "step": 16002 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.637860382543548e-08, - "loss": 0.4024, - "step": 16003 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.614937210435712e-08, - "loss": 0.4165, - "step": 16004 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.592053556887523e-08, - "loss": 0.4392, - "step": 16005 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.569209422809141e-08, - "loss": 0.4985, - "step": 16006 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.546404809109508e-08, - "loss": 0.4511, - "step": 16007 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.523639716695673e-08, - "loss": 0.4854, - "step": 16008 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.500914146473469e-08, - "loss": 0.4246, - "step": 16009 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.47822809934695e-08, - "loss": 0.393, - "step": 16010 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.455581576218395e-08, - "loss": 0.4395, - "step": 16011 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.43297457798886e-08, - "loss": 0.4344, - "step": 16012 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.410407105557625e-08, - "loss": 0.426, - "step": 16013 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.38787915982253e-08, - "loss": 0.4508, - "step": 16014 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.365390741679855e-08, - "loss": 0.4594, - "step": 16015 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.342941852023999e-08, - "loss": 0.3835, - "step": 16016 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.320532491748244e-08, - "loss": 0.4693, - "step": 16017 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.298162661743878e-08, - "loss": 0.4494, - "step": 16018 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.275832362900969e-08, - "loss": 0.4412, - "step": 16019 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.253541596107804e-08, - "loss": 0.4818, - "step": 16020 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.231290362251009e-08, - "loss": 0.5208, - "step": 16021 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.209078662215984e-08, - "loss": 0.3602, - "step": 16022 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.186906496886358e-08, - "loss": 0.4504, - "step": 16023 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.16477386714398e-08, - "loss": 0.4777, - "step": 16024 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.14268077386948e-08, - "loss": 0.3815, - "step": 16025 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.120627217941711e-08, - "loss": 0.4381, - "step": 16026 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.098613200237969e-08, - "loss": 0.4227, - "step": 16027 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.07663872163411e-08, - "loss": 0.39, - "step": 16028 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.054703783004101e-08, - "loss": 0.4268, - "step": 16029 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.032808385220801e-08, - "loss": 0.3918, - "step": 16030 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 6.010952529155178e-08, - "loss": 0.4876, - "step": 16031 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.98913621567665e-08, - "loss": 0.4069, - "step": 16032 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.967359445653075e-08, - "loss": 0.4504, - "step": 16033 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.945622219950875e-08, - "loss": 0.4535, - "step": 16034 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.923924539434689e-08, - "loss": 0.3907, - "step": 16035 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.9022664049677156e-08, - "loss": 0.4525, - "step": 16036 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.8806478174116e-08, - "loss": 0.4039, - "step": 16037 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.85906877762632e-08, - "loss": 0.4622, - "step": 16038 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.8375292864701894e-08, - "loss": 0.4584, - "step": 16039 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.8160293448005225e-08, - "loss": 0.4097, - "step": 16040 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.7945689534721906e-08, - "loss": 0.4572, - "step": 16041 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.773148113339066e-08, - "loss": 0.4276, - "step": 16042 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.7517668252533575e-08, - "loss": 0.4484, - "step": 16043 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.7304250900654945e-08, - "loss": 0.4523, - "step": 16044 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.709122908624687e-08, - "loss": 0.469, - "step": 16045 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.687860281778368e-08, - "loss": 0.338, - "step": 16046 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.666637210372194e-08, - "loss": 0.4841, - "step": 16047 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.6454536952507134e-08, - "loss": 0.3917, - "step": 16048 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.6243097372563614e-08, - "loss": 0.4023, - "step": 16049 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.6032053372304665e-08, - "loss": 0.4634, - "step": 16050 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.582140496012578e-08, - "loss": 0.3969, - "step": 16051 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.561115214440693e-08, - "loss": 0.3393, - "step": 16052 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.5401294933511426e-08, - "loss": 0.4718, - "step": 16053 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.5191833335789257e-08, - "loss": 0.3989, - "step": 16054 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.4982767359572645e-08, - "loss": 0.4293, - "step": 16055 - }, - { - "epoch": 2.9, - "grad_norm": 0.0, - "learning_rate": 5.477409701317715e-08, - "loss": 0.4765, - "step": 16056 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.456582230490615e-08, - "loss": 0.44, - "step": 16057 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.435794324304411e-08, - "loss": 0.387, - "step": 16058 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.415045983585998e-08, - "loss": 0.4386, - "step": 16059 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.394337209160938e-08, - "loss": 0.4678, - "step": 16060 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.373668001853016e-08, - "loss": 0.413, - "step": 16061 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.353038362484464e-08, - "loss": 0.5345, - "step": 16062 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.332448291875958e-08, - "loss": 0.3766, - "step": 16063 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.3118977908466205e-08, - "loss": 0.467, - "step": 16064 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.291386860214021e-08, - "loss": 0.3582, - "step": 16065 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.27091550079395e-08, - "loss": 0.4032, - "step": 16066 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.2504837134010886e-08, - "loss": 0.3956, - "step": 16067 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.230091498848011e-08, - "loss": 0.5237, - "step": 16068 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.209738857946067e-08, - "loss": 0.3793, - "step": 16069 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.189425791504832e-08, - "loss": 0.47, - "step": 16070 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.169152300332436e-08, - "loss": 0.4275, - "step": 16071 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.148918385235457e-08, - "loss": 0.509, - "step": 16072 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.128724047018696e-08, - "loss": 0.4767, - "step": 16073 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.108569286485732e-08, - "loss": 0.4386, - "step": 16074 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.0884541044381454e-08, - "loss": 0.4047, - "step": 16075 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.0683785016761856e-08, - "loss": 0.4873, - "step": 16076 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.048342478998547e-08, - "loss": 0.4706, - "step": 16077 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.028346037202481e-08, - "loss": 0.4504, - "step": 16078 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 5.0083891770831285e-08, - "loss": 0.4453, - "step": 16079 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.9884718994345214e-08, - "loss": 0.3334, - "step": 16080 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.9685942050491376e-08, - "loss": 0.4107, - "step": 16081 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.948756094717566e-08, - "loss": 0.5079, - "step": 16082 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.928957569229176e-08, - "loss": 0.4316, - "step": 16083 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.9091986293715586e-08, - "loss": 0.4356, - "step": 16084 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.889479275930642e-08, - "loss": 0.4905, - "step": 16085 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.86979950969102e-08, - "loss": 0.4155, - "step": 16086 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.8501593314354e-08, - "loss": 0.4247, - "step": 16087 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.830558741945268e-08, - "loss": 0.3964, - "step": 16088 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.810997742000334e-08, - "loss": 0.3864, - "step": 16089 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.791476332378864e-08, - "loss": 0.4054, - "step": 16090 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.7719945138572366e-08, - "loss": 0.3723, - "step": 16091 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.75255228721061e-08, - "loss": 0.4197, - "step": 16092 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.7331496532124767e-08, - "loss": 0.3883, - "step": 16093 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.7137866126345524e-08, - "loss": 0.4139, - "step": 16094 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.694463166247221e-08, - "loss": 0.5132, - "step": 16095 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.6751793148192005e-08, - "loss": 0.4076, - "step": 16096 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.655935059117655e-08, - "loss": 0.4008, - "step": 16097 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.636730399908196e-08, - "loss": 0.5408, - "step": 16098 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.6175653379546545e-08, - "loss": 0.4108, - "step": 16099 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.5984398740195336e-08, - "loss": 0.4016, - "step": 16100 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.579354008863779e-08, - "loss": 0.4581, - "step": 16101 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.560307743246562e-08, - "loss": 0.4563, - "step": 16102 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.541301077925497e-08, - "loss": 0.487, - "step": 16103 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.522334013656871e-08, - "loss": 0.4146, - "step": 16104 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.5034065511951884e-08, - "loss": 0.4209, - "step": 16105 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.484518691293294e-08, - "loss": 0.4638, - "step": 16106 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.4656704347026956e-08, - "loss": 0.4421, - "step": 16107 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.4468617821731285e-08, - "loss": 0.4635, - "step": 16108 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.428092734452882e-08, - "loss": 0.4876, - "step": 16109 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.409363292288582e-08, - "loss": 0.4741, - "step": 16110 - }, - { - "epoch": 2.91, - "grad_norm": 0.0, - "learning_rate": 4.3906734564254093e-08, - "loss": 0.4246, - "step": 16111 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.372023227606881e-08, - "loss": 0.5418, - "step": 16112 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.353412606574847e-08, - "loss": 0.4352, - "step": 16113 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.334841594069605e-08, - "loss": 0.4587, - "step": 16114 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.316310190830009e-08, - "loss": 0.4222, - "step": 16115 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.297818397593467e-08, - "loss": 0.4705, - "step": 16116 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.279366215095282e-08, - "loss": 0.4179, - "step": 16117 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.260953644069754e-08, - "loss": 0.4231, - "step": 16118 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.242580685249298e-08, - "loss": 0.4471, - "step": 16119 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.2242473393647733e-08, - "loss": 0.3888, - "step": 16120 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.205953607145485e-08, - "loss": 0.54, - "step": 16121 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.1876994893194075e-08, - "loss": 0.3921, - "step": 16122 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.1694849866125155e-08, - "loss": 0.4299, - "step": 16123 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.1513100997493396e-08, - "loss": 0.4084, - "step": 16124 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.1331748294530794e-08, - "loss": 0.3854, - "step": 16125 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.115079176445269e-08, - "loss": 0.3882, - "step": 16126 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.097023141445555e-08, - "loss": 0.3816, - "step": 16127 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.079006725172363e-08, - "loss": 0.4518, - "step": 16128 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.0610299283423415e-08, - "loss": 0.5014, - "step": 16129 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.0430927516706965e-08, - "loss": 0.3963, - "step": 16130 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.025195195870968e-08, - "loss": 0.4858, - "step": 16131 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 4.007337261655253e-08, - "loss": 0.4981, - "step": 16132 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.989518949733873e-08, - "loss": 0.3826, - "step": 16133 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.971740260815704e-08, - "loss": 0.4636, - "step": 16134 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.9540011956079594e-08, - "loss": 0.4068, - "step": 16135 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.9363017548162965e-08, - "loss": 0.4231, - "step": 16136 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.91864193914504e-08, - "loss": 0.442, - "step": 16137 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.9010217492965184e-08, - "loss": 0.4555, - "step": 16138 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.883441185971837e-08, - "loss": 0.4112, - "step": 16139 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.865900249870214e-08, - "loss": 0.3952, - "step": 16140 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.848398941689535e-08, - "loss": 0.4338, - "step": 16141 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.830937262126133e-08, - "loss": 0.4788, - "step": 16142 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.8135152118745634e-08, - "loss": 0.3849, - "step": 16143 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.796132791627827e-08, - "loss": 0.5246, - "step": 16144 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.778790002077593e-08, - "loss": 0.4332, - "step": 16145 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.761486843913753e-08, - "loss": 0.45, - "step": 16146 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.744223317824647e-08, - "loss": 0.4794, - "step": 16147 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.7269994244969466e-08, - "loss": 0.404, - "step": 16148 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.7098151646158817e-08, - "loss": 0.4376, - "step": 16149 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.6926705388651286e-08, - "loss": 0.4289, - "step": 16150 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.675565547926807e-08, - "loss": 0.4597, - "step": 16151 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.658500192481151e-08, - "loss": 0.3606, - "step": 16152 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.641474473207285e-08, - "loss": 0.4288, - "step": 16153 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.624488390782333e-08, - "loss": 0.4101, - "step": 16154 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.607541945882198e-08, - "loss": 0.4037, - "step": 16155 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.590635139180898e-08, - "loss": 0.4325, - "step": 16156 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.5737679713511166e-08, - "loss": 0.4606, - "step": 16157 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.556940443063761e-08, - "loss": 0.4026, - "step": 16158 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.5401525549884074e-08, - "loss": 0.4818, - "step": 16159 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.5234043077927435e-08, - "loss": 0.5406, - "step": 16160 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.506695702143237e-08, - "loss": 0.4176, - "step": 16161 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.490026738704355e-08, - "loss": 0.3993, - "step": 16162 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.473397418139346e-08, - "loss": 0.3897, - "step": 16163 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.4568077411096804e-08, - "loss": 0.4147, - "step": 16164 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.440257708275496e-08, - "loss": 0.5022, - "step": 16165 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.4237473202949345e-08, - "loss": 0.4624, - "step": 16166 - }, - { - "epoch": 2.92, - "grad_norm": 0.0, - "learning_rate": 3.407276577825025e-08, - "loss": 0.4613, - "step": 16167 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.3908454815207994e-08, - "loss": 0.5154, - "step": 16168 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.3744540320361785e-08, - "loss": 0.3769, - "step": 16169 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.358102230022975e-08, - "loss": 0.3845, - "step": 16170 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.34179007613189e-08, - "loss": 0.3718, - "step": 16171 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.3255175710117385e-08, - "loss": 0.4483, - "step": 16172 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.309284715309891e-08, - "loss": 0.3968, - "step": 16173 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.293091509672053e-08, - "loss": 0.3517, - "step": 16174 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.276937954742487e-08, - "loss": 0.4435, - "step": 16175 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.260824051163902e-08, - "loss": 0.4271, - "step": 16176 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.244749799577229e-08, - "loss": 0.489, - "step": 16177 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.228715200621957e-08, - "loss": 0.4326, - "step": 16178 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.2127202549359085e-08, - "loss": 0.4682, - "step": 16179 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.196764963155463e-08, - "loss": 0.386, - "step": 16180 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.180849325915336e-08, - "loss": 0.3947, - "step": 16181 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.1649733438486874e-08, - "loss": 0.4249, - "step": 16182 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.149137017587012e-08, - "loss": 0.3865, - "step": 16183 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.133340347760361e-08, - "loss": 0.4231, - "step": 16184 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.117583334997232e-08, - "loss": 0.4509, - "step": 16185 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.1018659799243456e-08, - "loss": 0.401, - "step": 16186 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.086188283167091e-08, - "loss": 0.4573, - "step": 16187 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.070550245348969e-08, - "loss": 0.4525, - "step": 16188 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.054951867092259e-08, - "loss": 0.456, - "step": 16189 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.0393931490173555e-08, - "loss": 0.3746, - "step": 16190 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.023874091743317e-08, - "loss": 0.4532, - "step": 16191 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 3.00839469588754e-08, - "loss": 0.3734, - "step": 16192 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.992954962065642e-08, - "loss": 0.4194, - "step": 16193 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.9775548908920205e-08, - "loss": 0.4116, - "step": 16194 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.962194482979297e-08, - "loss": 0.3997, - "step": 16195 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.946873738938427e-08, - "loss": 0.4759, - "step": 16196 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.9315926593790344e-08, - "loss": 0.4974, - "step": 16197 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.9163512449089658e-08, - "loss": 0.4072, - "step": 16198 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.9011494961345145e-08, - "loss": 0.4124, - "step": 16199 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.885987413660418e-08, - "loss": 0.4472, - "step": 16200 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.8708649980899728e-08, - "loss": 0.3453, - "step": 16201 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.8557822500245856e-08, - "loss": 0.4339, - "step": 16202 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.8407391700645547e-08, - "loss": 0.3994, - "step": 16203 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.825735758807957e-08, - "loss": 0.5083, - "step": 16204 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.8107720168519815e-08, - "loss": 0.3718, - "step": 16205 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7958479447917076e-08, - "loss": 0.4203, - "step": 16206 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7809635432209935e-08, - "loss": 0.4649, - "step": 16207 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7661188127318107e-08, - "loss": 0.4488, - "step": 16208 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7513137539147972e-08, - "loss": 0.4234, - "step": 16209 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7365483673588157e-08, - "loss": 0.3726, - "step": 16210 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.721822653651396e-08, - "loss": 0.4945, - "step": 16211 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.7071366133782917e-08, - "loss": 0.409, - "step": 16212 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6924902471238134e-08, - "loss": 0.4306, - "step": 16213 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6778835554706063e-08, - "loss": 0.4112, - "step": 16214 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6633165389996495e-08, - "loss": 0.4783, - "step": 16215 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.64878919829048e-08, - "loss": 0.4792, - "step": 16216 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6343015339209687e-08, - "loss": 0.4326, - "step": 16217 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6198535464675435e-08, - "loss": 0.4841, - "step": 16218 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.6054452365049665e-08, - "loss": 0.4265, - "step": 16219 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.5910766046064462e-08, - "loss": 0.4012, - "step": 16220 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.5767476513435253e-08, - "loss": 0.496, - "step": 16221 - }, - { - "epoch": 2.93, - "grad_norm": 0.0, - "learning_rate": 2.5624583772863032e-08, - "loss": 0.4882, - "step": 16222 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.548208783003103e-08, - "loss": 0.4571, - "step": 16223 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.5339988690610274e-08, - "loss": 0.4549, - "step": 16224 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.519828636025068e-08, - "loss": 0.3584, - "step": 16225 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.5056980844592183e-08, - "loss": 0.4808, - "step": 16226 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.4916072149254732e-08, - "loss": 0.3874, - "step": 16227 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.4775560279843845e-08, - "loss": 0.4132, - "step": 16228 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.4635445241950605e-08, - "loss": 0.3857, - "step": 16229 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.449572704114722e-08, - "loss": 0.3544, - "step": 16230 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.4356405682993688e-08, - "loss": 0.3922, - "step": 16231 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.4217481173030023e-08, - "loss": 0.4509, - "step": 16232 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.407895351678513e-08, - "loss": 0.4499, - "step": 16233 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.3940822719770163e-08, - "loss": 0.3744, - "step": 16234 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.38030887874785e-08, - "loss": 0.4233, - "step": 16235 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.3665751725389098e-08, - "loss": 0.3952, - "step": 16236 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.3528811538968687e-08, - "loss": 0.3824, - "step": 16237 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.3392268233660697e-08, - "loss": 0.426, - "step": 16238 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.325612181489967e-08, - "loss": 0.3846, - "step": 16239 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.3120372288102376e-08, - "loss": 0.4584, - "step": 16240 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.298501965866673e-08, - "loss": 0.3707, - "step": 16241 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.2850063931979525e-08, - "loss": 0.4198, - "step": 16242 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.2715505113407588e-08, - "loss": 0.4169, - "step": 16243 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.2581343208305518e-08, - "loss": 0.4384, - "step": 16244 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.244757822201016e-08, - "loss": 0.501, - "step": 16245 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.23142101598417e-08, - "loss": 0.4071, - "step": 16246 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.2181239027108114e-08, - "loss": 0.385, - "step": 16247 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.204866482909629e-08, - "loss": 0.4302, - "step": 16248 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.1916487571082e-08, - "loss": 0.4787, - "step": 16249 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.1784707258324378e-08, - "loss": 0.4858, - "step": 16250 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.165332389606367e-08, - "loss": 0.434, - "step": 16251 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.1522337489527922e-08, - "loss": 0.5401, - "step": 16252 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.1391748043927406e-08, - "loss": 0.4079, - "step": 16253 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.126155556445797e-08, - "loss": 0.455, - "step": 16254 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.113176005629769e-08, - "loss": 0.4821, - "step": 16255 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.1002361524611326e-08, - "loss": 0.4429, - "step": 16256 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.087335997454587e-08, - "loss": 0.4473, - "step": 16257 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.0744755411233887e-08, - "loss": 0.4046, - "step": 16258 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.061654783979017e-08, - "loss": 0.4167, - "step": 16259 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.0488737265316194e-08, - "loss": 0.4175, - "step": 16260 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.0361323692895675e-08, - "loss": 0.4834, - "step": 16261 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.0234307127597887e-08, - "loss": 0.5666, - "step": 16262 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 2.0107687574475453e-08, - "loss": 0.4216, - "step": 16263 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.9981465038566573e-08, - "loss": 0.4661, - "step": 16264 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.9855639524890558e-08, - "loss": 0.3952, - "step": 16265 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.9730211038454518e-08, - "loss": 0.3528, - "step": 16266 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.96051795842489e-08, - "loss": 0.5421, - "step": 16267 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.948054516724529e-08, - "loss": 0.4501, - "step": 16268 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.935630779240416e-08, - "loss": 0.4746, - "step": 16269 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.9232467464666006e-08, - "loss": 0.4324, - "step": 16270 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.91090241889591e-08, - "loss": 0.3963, - "step": 16271 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.898597797019286e-08, - "loss": 0.3889, - "step": 16272 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.886332881326225e-08, - "loss": 0.5314, - "step": 16273 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.8741076723047815e-08, - "loss": 0.4493, - "step": 16274 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.8619221704413438e-08, - "loss": 0.3668, - "step": 16275 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.8497763762203026e-08, - "loss": 0.4148, - "step": 16276 - }, - { - "epoch": 2.94, - "grad_norm": 0.0, - "learning_rate": 1.8376702901252708e-08, - "loss": 0.415, - "step": 16277 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.82560391263753e-08, - "loss": 0.4919, - "step": 16278 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.8135772442373635e-08, - "loss": 0.4244, - "step": 16279 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.8015902854029432e-08, - "loss": 0.4635, - "step": 16280 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.7896430366114435e-08, - "loss": 0.4748, - "step": 16281 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.777735498337818e-08, - "loss": 0.4239, - "step": 16282 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.765867671056021e-08, - "loss": 0.4537, - "step": 16283 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.7540395552380073e-08, - "loss": 0.4899, - "step": 16284 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.7422511513544015e-08, - "loss": 0.4061, - "step": 16285 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.7305024598741616e-08, - "loss": 0.4282, - "step": 16286 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.718793481264691e-08, - "loss": 0.4111, - "step": 16287 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.7071242159917288e-08, - "loss": 0.3937, - "step": 16288 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.69549466451957e-08, - "loss": 0.4583, - "step": 16289 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.683904827310845e-08, - "loss": 0.4362, - "step": 16290 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.6723547048265177e-08, - "loss": 0.3995, - "step": 16291 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.6608442975262205e-08, - "loss": 0.4174, - "step": 16292 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.649373605867699e-08, - "loss": 0.4077, - "step": 16293 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.6379426303074765e-08, - "loss": 0.4396, - "step": 16294 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.626551371300078e-08, - "loss": 0.4288, - "step": 16295 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.615199829298919e-08, - "loss": 0.462, - "step": 16296 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.6038880047553052e-08, - "loss": 0.3774, - "step": 16297 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5926158981194318e-08, - "loss": 0.3519, - "step": 16298 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5813835098396068e-08, - "loss": 0.472, - "step": 16299 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5701908403628065e-08, - "loss": 0.4483, - "step": 16300 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5590378901342295e-08, - "loss": 0.3707, - "step": 16301 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5479246595975215e-08, - "loss": 0.4359, - "step": 16302 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5368511491947736e-08, - "loss": 0.4455, - "step": 16303 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5258173593665215e-08, - "loss": 0.3978, - "step": 16304 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.514823290551859e-08, - "loss": 0.5777, - "step": 16305 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.5038689431879915e-08, - "loss": 0.4028, - "step": 16306 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.492954317710682e-08, - "loss": 0.4544, - "step": 16307 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4820794145542495e-08, - "loss": 0.3834, - "step": 16308 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4712442341511257e-08, - "loss": 0.4466, - "step": 16309 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4604487769326326e-08, - "loss": 0.3446, - "step": 16310 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.449693043327982e-08, - "loss": 0.4309, - "step": 16311 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4389770337651653e-08, - "loss": 0.3788, - "step": 16312 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.428300748670508e-08, - "loss": 0.4728, - "step": 16313 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4176641884686703e-08, - "loss": 0.4362, - "step": 16314 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.4070673535828694e-08, - "loss": 0.4474, - "step": 16315 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3965102444345458e-08, - "loss": 0.424, - "step": 16316 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3859928614438079e-08, - "loss": 0.4324, - "step": 16317 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3755152050289877e-08, - "loss": 0.4459, - "step": 16318 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3650772756068631e-08, - "loss": 0.4526, - "step": 16319 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3546790735927683e-08, - "loss": 0.39, - "step": 16320 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3443205994002617e-08, - "loss": 0.4024, - "step": 16321 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3340018534414578e-08, - "loss": 0.51, - "step": 16322 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.3237228361269172e-08, - "loss": 0.4958, - "step": 16323 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.313483547865535e-08, - "loss": 0.5298, - "step": 16324 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.303283989064541e-08, - "loss": 0.3521, - "step": 16325 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.2931241601298327e-08, - "loss": 0.45, - "step": 16326 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.2830040614655314e-08, - "loss": 0.4682, - "step": 16327 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.272923693474204e-08, - "loss": 0.4541, - "step": 16328 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.262883056556974e-08, - "loss": 0.4129, - "step": 16329 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.2528821511130773e-08, - "loss": 0.4116, - "step": 16330 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.2429209775405294e-08, - "loss": 0.4216, - "step": 16331 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.2329995362354574e-08, - "loss": 0.3821, - "step": 16332 - }, - { - "epoch": 2.95, - "grad_norm": 0.0, - "learning_rate": 1.223117827592768e-08, - "loss": 0.3815, - "step": 16333 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.213275852005369e-08, - "loss": 0.4252, - "step": 16334 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.2034736098649469e-08, - "loss": 0.365, - "step": 16335 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1937111015613013e-08, - "loss": 0.5158, - "step": 16336 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.183988327482899e-08, - "loss": 0.4521, - "step": 16337 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1743052880165418e-08, - "loss": 0.3659, - "step": 16338 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.164661983547366e-08, - "loss": 0.3833, - "step": 16339 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1550584144590648e-08, - "loss": 0.4513, - "step": 16340 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1454945811336659e-08, - "loss": 0.4534, - "step": 16341 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1359704839515317e-08, - "loss": 0.5033, - "step": 16342 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1264861232918034e-08, - "loss": 0.4414, - "step": 16343 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.1170414995315127e-08, - "loss": 0.4571, - "step": 16344 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.10763661304647e-08, - "loss": 0.4849, - "step": 16345 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0982714642109316e-08, - "loss": 0.4322, - "step": 16346 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0889460533972662e-08, - "loss": 0.4071, - "step": 16347 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0796603809766216e-08, - "loss": 0.4383, - "step": 16348 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0704144473183687e-08, - "loss": 0.3816, - "step": 16349 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0612082527902135e-08, - "loss": 0.3856, - "step": 16350 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0520417977585295e-08, - "loss": 0.364, - "step": 16351 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0429150825878032e-08, - "loss": 0.4882, - "step": 16352 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0338281076414103e-08, - "loss": 0.4366, - "step": 16353 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.0247808732805065e-08, - "loss": 0.3956, - "step": 16354 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.015773379865248e-08, - "loss": 0.4019, - "step": 16355 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 1.006805627753793e-08, - "loss": 0.3745, - "step": 16356 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.97877617303078e-09, - "loss": 0.4145, - "step": 16357 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.889893488680413e-09, - "loss": 0.4304, - "step": 16358 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.801408228025111e-09, - "loss": 0.4757, - "step": 16359 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.71332039458428e-09, - "loss": 0.4003, - "step": 16360 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.625629991860674e-09, - "loss": 0.3676, - "step": 16361 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.538337023344834e-09, - "loss": 0.4606, - "step": 16362 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.451441492508429e-09, - "loss": 0.3178, - "step": 16363 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.36494340280869e-09, - "loss": 0.4067, - "step": 16364 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.278842757686202e-09, - "loss": 0.4658, - "step": 16365 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.193139560566e-09, - "loss": 0.4016, - "step": 16366 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.107833814858691e-09, - "loss": 0.292, - "step": 16367 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 9.022925523956006e-09, - "loss": 0.431, - "step": 16368 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.938414691237463e-09, - "loss": 0.441, - "step": 16369 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.854301320064817e-09, - "loss": 0.4741, - "step": 16370 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.770585413783173e-09, - "loss": 0.4853, - "step": 16371 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.687266975723196e-09, - "loss": 0.4494, - "step": 16372 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.604346009201125e-09, - "loss": 0.4705, - "step": 16373 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.521822517513212e-09, - "loss": 0.5083, - "step": 16374 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.439696503944605e-09, - "loss": 0.4312, - "step": 16375 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.357967971760472e-09, - "loss": 0.5183, - "step": 16376 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.276636924213766e-09, - "loss": 0.4528, - "step": 16377 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.195703364538566e-09, - "loss": 0.4182, - "step": 16378 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.115167295955628e-09, - "loss": 0.4335, - "step": 16379 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 8.035028721667948e-09, - "loss": 0.4269, - "step": 16380 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.955287644864084e-09, - "loss": 0.3847, - "step": 16381 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.875944068715946e-09, - "loss": 0.4264, - "step": 16382 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.796997996381007e-09, - "loss": 0.4356, - "step": 16383 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.718449430998975e-09, - "loss": 0.4208, - "step": 16384 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.640298375694022e-09, - "loss": 0.4253, - "step": 16385 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.56254483357588e-09, - "loss": 0.4383, - "step": 16386 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.48518880773874e-09, - "loss": 0.5205, - "step": 16387 - }, - { - "epoch": 2.96, - "grad_norm": 0.0, - "learning_rate": 7.408230301257924e-09, - "loss": 0.4499, - "step": 16388 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 7.331669317196533e-09, - "loss": 0.4291, - "step": 16389 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 7.255505858599909e-09, - "loss": 0.4429, - "step": 16390 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 7.179739928496743e-09, - "loss": 0.52, - "step": 16391 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 7.1043715299035085e-09, - "loss": 0.3878, - "step": 16392 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 7.029400665815589e-09, - "loss": 0.4017, - "step": 16393 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.954827339218373e-09, - "loss": 0.4143, - "step": 16394 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.880651553076157e-09, - "loss": 0.3837, - "step": 16395 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.806873310342133e-09, - "loss": 0.3817, - "step": 16396 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.7334926139484006e-09, - "loss": 0.4268, - "step": 16397 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.660509466817067e-09, - "loss": 0.4059, - "step": 16398 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.587923871849144e-09, - "loss": 0.4086, - "step": 16399 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.5157358319345446e-09, - "loss": 0.3962, - "step": 16400 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.443945349942082e-09, - "loss": 0.4063, - "step": 16401 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.372552428730583e-09, - "loss": 0.4472, - "step": 16402 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.301557071138886e-09, - "loss": 0.4276, - "step": 16403 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.2309592799914e-09, - "loss": 0.5021, - "step": 16404 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.160759058095878e-09, - "loss": 0.312, - "step": 16405 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.090956408246751e-09, - "loss": 0.4901, - "step": 16406 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 6.021551333219578e-09, - "loss": 0.408, - "step": 16407 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.952543835775482e-09, - "loss": 0.4441, - "step": 16408 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.883933918660045e-09, - "loss": 0.4091, - "step": 16409 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.815721584602196e-09, - "loss": 0.3964, - "step": 16410 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.7479068363164306e-09, - "loss": 0.4101, - "step": 16411 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.680489676500589e-09, - "loss": 0.4603, - "step": 16412 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.613470107834751e-09, - "loss": 0.4521, - "step": 16413 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.546848132987892e-09, - "loss": 0.4248, - "step": 16414 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.480623754607894e-09, - "loss": 0.3535, - "step": 16415 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.4147969753304275e-09, - "loss": 0.4054, - "step": 16416 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.349367797773397e-09, - "loss": 0.4948, - "step": 16417 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.284336224541387e-09, - "loss": 0.3793, - "step": 16418 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.219702258220105e-09, - "loss": 0.4306, - "step": 16419 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.155465901380829e-09, - "loss": 0.4186, - "step": 16420 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.091627156579293e-09, - "loss": 0.5008, - "step": 16421 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 5.028186026354576e-09, - "loss": 0.389, - "step": 16422 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.965142513231325e-09, - "loss": 0.4665, - "step": 16423 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.902496619717534e-09, - "loss": 0.3861, - "step": 16424 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.840248348304544e-09, - "loss": 0.373, - "step": 16425 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.7783977014692614e-09, - "loss": 0.4557, - "step": 16426 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.716944681671942e-09, - "loss": 0.4576, - "step": 16427 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.655889291357296e-09, - "loss": 0.4465, - "step": 16428 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.595231532955602e-09, - "loss": 0.4387, - "step": 16429 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.534971408877153e-09, - "loss": 0.369, - "step": 16430 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.475108921521143e-09, - "loss": 0.4306, - "step": 16431 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.415644073268999e-09, - "loss": 0.3848, - "step": 16432 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.356576866485496e-09, - "loss": 0.3637, - "step": 16433 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.297907303520976e-09, - "loss": 0.4957, - "step": 16434 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.239635386709129e-09, - "loss": 0.5213, - "step": 16435 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.1817611183692096e-09, - "loss": 0.516, - "step": 16436 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.124284500801601e-09, - "loss": 0.4513, - "step": 16437 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.067205536294472e-09, - "loss": 0.449, - "step": 16438 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 4.010524227117119e-09, - "loss": 0.3992, - "step": 16439 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 3.954240575525514e-09, - "loss": 0.4479, - "step": 16440 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 3.898354583758979e-09, - "loss": 0.4303, - "step": 16441 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 3.842866254039068e-09, - "loss": 0.3954, - "step": 16442 - }, - { - "epoch": 2.97, - "grad_norm": 0.0, - "learning_rate": 3.787775588575126e-09, - "loss": 0.4563, - "step": 16443 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.733082589557624e-09, - "loss": 0.455, - "step": 16444 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.678787259162597e-09, - "loss": 0.4172, - "step": 16445 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.624889599550541e-09, - "loss": 0.4928, - "step": 16446 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.5713896128641846e-09, - "loss": 0.5254, - "step": 16447 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.518287301234047e-09, - "loss": 0.4726, - "step": 16448 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.4655826667706617e-09, - "loss": 0.4125, - "step": 16449 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.41327571157124e-09, - "loss": 0.4341, - "step": 16450 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.3613664377174503e-09, - "loss": 0.4461, - "step": 16451 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.3098548472731975e-09, - "loss": 0.3674, - "step": 16452 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.2587409422879525e-09, - "loss": 0.4461, - "step": 16453 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.2080247247956443e-09, - "loss": 0.4455, - "step": 16454 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.157706196813548e-09, - "loss": 0.4619, - "step": 16455 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.1077853603433963e-09, - "loss": 0.4381, - "step": 16456 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.0582622173713773e-09, - "loss": 0.4031, - "step": 16457 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 3.009136769867027e-09, - "loss": 0.4412, - "step": 16458 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.9604090197854487e-09, - "loss": 0.4551, - "step": 16459 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.912078969065091e-09, - "loss": 0.4093, - "step": 16460 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.8641466196266397e-09, - "loss": 0.4303, - "step": 16461 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.8166119733796794e-09, - "loss": 0.4597, - "step": 16462 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.7694750322138085e-09, - "loss": 0.3547, - "step": 16463 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.7227357980041947e-09, - "loss": 0.4193, - "step": 16464 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.6763942726104607e-09, - "loss": 0.4297, - "step": 16465 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.6304504578766875e-09, - "loss": 0.4208, - "step": 16466 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.5849043556303022e-09, - "loss": 0.4042, - "step": 16467 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.539755967682078e-09, - "loss": 0.4215, - "step": 16468 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.4950052958283567e-09, - "loss": 0.4175, - "step": 16469 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.4506523418510454e-09, - "loss": 0.4393, - "step": 16470 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.4066971075131784e-09, - "loss": 0.3948, - "step": 16471 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.3631395945644676e-09, - "loss": 0.4094, - "step": 16472 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.31997980473575e-09, - "loss": 0.3985, - "step": 16473 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.277217739746762e-09, - "loss": 0.4844, - "step": 16474 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.2348534012961443e-09, - "loss": 0.4307, - "step": 16475 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.192886791069215e-09, - "loss": 0.4147, - "step": 16476 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.1513179107379712e-09, - "loss": 0.4507, - "step": 16477 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.1101467619544237e-09, - "loss": 0.4199, - "step": 16478 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.0693733463561516e-09, - "loss": 0.4343, - "step": 16479 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 2.028997665565191e-09, - "loss": 0.3791, - "step": 16480 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.9890197211880347e-09, - "loss": 0.4129, - "step": 16481 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.949439514816742e-09, - "loss": 0.422, - "step": 16482 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.9102570480233893e-09, - "loss": 0.4449, - "step": 16483 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.87147232236784e-09, - "loss": 0.4383, - "step": 16484 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.8330853393921933e-09, - "loss": 0.4341, - "step": 16485 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.7950961006252265e-09, - "loss": 0.4371, - "step": 16486 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.757504607576843e-09, - "loss": 0.4391, - "step": 16487 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.7203108617425136e-09, - "loss": 0.4763, - "step": 16488 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.6835148646021648e-09, - "loss": 0.3838, - "step": 16489 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.6471166176201814e-09, - "loss": 0.4776, - "step": 16490 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.6111161222431837e-09, - "loss": 0.4489, - "step": 16491 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.57551337990558e-09, - "loss": 0.3828, - "step": 16492 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.540308392020684e-09, - "loss": 0.3723, - "step": 16493 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.505501159991818e-09, - "loss": 0.4455, - "step": 16494 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.4710916852023194e-09, - "loss": 0.4382, - "step": 16495 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.4370799690210934e-09, - "loss": 0.4423, - "step": 16496 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.403466012801502e-09, - "loss": 0.3828, - "step": 16497 - }, - { - "epoch": 2.98, - "grad_norm": 0.0, - "learning_rate": 1.3702498178802536e-09, - "loss": 0.484, - "step": 16498 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.3374313855785136e-09, - "loss": 0.4063, - "step": 16499 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.3050107172030146e-09, - "loss": 0.4328, - "step": 16500 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.2729878140438357e-09, - "loss": 0.4731, - "step": 16501 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.2413626773732924e-09, - "loss": 0.4465, - "step": 16502 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.2101353084492673e-09, - "loss": 0.3995, - "step": 16503 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.1793057085152104e-09, - "loss": 0.5169, - "step": 16504 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.148873878797918e-09, - "loss": 0.4474, - "step": 16505 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.1188398205075335e-09, - "loss": 0.3843, - "step": 16506 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.0892035348386564e-09, - "loss": 0.4209, - "step": 16507 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.059965022970344e-09, - "loss": 0.3736, - "step": 16508 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.0311242860649994e-09, - "loss": 0.3303, - "step": 16509 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.0026813252717037e-09, - "loss": 0.5313, - "step": 16510 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 9.746361417195538e-10, - "loss": 0.4199, - "step": 16511 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 9.469887365276542e-10, - "loss": 0.3527, - "step": 16512 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 9.197391107917952e-10, - "loss": 0.4475, - "step": 16513 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 8.928872655999954e-10, - "loss": 0.5038, - "step": 16514 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 8.664332020169586e-10, - "loss": 0.4524, - "step": 16515 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 8.403769210973966e-10, - "loss": 0.4916, - "step": 16516 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 8.147184238771477e-10, - "loss": 0.4748, - "step": 16517 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 7.894577113776169e-10, - "loss": 0.4177, - "step": 16518 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 7.64594784602446e-10, - "loss": 0.3828, - "step": 16519 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 7.401296445408435e-10, - "loss": 0.509, - "step": 16520 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 7.160622921675853e-10, - "loss": 0.3924, - "step": 16521 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 6.923927284374631e-10, - "loss": 0.449, - "step": 16522 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 6.69120954295277e-10, - "loss": 0.4509, - "step": 16523 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 6.462469706647323e-10, - "loss": 0.4044, - "step": 16524 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 6.237707784562119e-10, - "loss": 0.4998, - "step": 16525 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 6.016923785645556e-10, - "loss": 0.4483, - "step": 16526 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 5.800117718668397e-10, - "loss": 0.4762, - "step": 16527 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 5.587289592268174e-10, - "loss": 0.4292, - "step": 16528 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 5.378439414893688e-10, - "loss": 0.3773, - "step": 16529 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 5.17356719487161e-10, - "loss": 0.3612, - "step": 16530 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.972672940350976e-10, - "loss": 0.4307, - "step": 16531 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.77575665931429e-10, - "loss": 0.5085, - "step": 16532 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.582818359599728e-10, - "loss": 0.4561, - "step": 16533 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.3938580488789293e-10, - "loss": 0.4021, - "step": 16534 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.2088757346792076e-10, - "loss": 0.4776, - "step": 16535 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 4.0278714243502383e-10, - "loss": 0.4247, - "step": 16536 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.8508451250973687e-10, - "loss": 0.4114, - "step": 16537 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.6777968439594137e-10, - "loss": 0.4209, - "step": 16538 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.508726587819755e-10, - "loss": 0.4843, - "step": 16539 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.343634363406345e-10, - "loss": 0.4754, - "step": 16540 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.1825201772806016e-10, - "loss": 0.4008, - "step": 16541 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 3.0253840358707155e-10, - "loss": 0.3658, - "step": 16542 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.8722259454050385e-10, - "loss": 0.4097, - "step": 16543 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.723045911989797e-10, - "loss": 0.4007, - "step": 16544 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.577843941564684e-10, - "loss": 0.4127, - "step": 16545 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.4366200398917573e-10, - "loss": 0.5534, - "step": 16546 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.2993742125887454e-10, - "loss": 0.4552, - "step": 16547 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.1661064651290475e-10, - "loss": 0.4555, - "step": 16548 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 2.0368168028084279e-10, - "loss": 0.4027, - "step": 16549 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.9115052307672189e-10, - "loss": 0.4525, - "step": 16550 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.7901717539903218e-10, - "loss": 0.3958, - "step": 16551 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.672816377307207e-10, - "loss": 0.4184, - "step": 16552 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.5594391053919133e-10, - "loss": 0.4394, - "step": 16553 - }, - { - "epoch": 2.99, - "grad_norm": 0.0, - "learning_rate": 1.450039942740844e-10, - "loss": 0.4266, - "step": 16554 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.3446188937282777e-10, - "loss": 0.43, - "step": 16555 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.243175962517551e-10, - "loss": 0.3781, - "step": 16556 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.14571115317208e-10, - "loss": 0.4181, - "step": 16557 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.0522244695554407e-10, - "loss": 0.3457, - "step": 16558 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 9.627159153868804e-11, - "loss": 0.3519, - "step": 16559 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 8.771854942302149e-11, - "loss": 0.3586, - "step": 16560 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 7.956332094827268e-11, - "loss": 0.5268, - "step": 16561 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 7.180590643973695e-11, - "loss": 0.4212, - "step": 16562 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 6.444630620494608e-11, - "loss": 0.4385, - "step": 16563 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 5.7484520538109155e-11, - "loss": 0.4499, - "step": 16564 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 5.092054971567173e-11, - "loss": 0.4047, - "step": 16565 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 4.475439399742598e-11, - "loss": 0.544, - "step": 16566 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 3.898605363095165e-11, - "loss": 0.4408, - "step": 16567 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 3.3615528842734225e-11, - "loss": 0.4018, - "step": 16568 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 2.86428198492672e-11, - "loss": 0.3429, - "step": 16569 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 2.4067926848170274e-11, - "loss": 0.3985, - "step": 16570 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.989085001929958e-11, - "loss": 0.4124, - "step": 16571 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.611158953029879e-11, - "loss": 0.4357, - "step": 16572 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.2730145532158234e-11, - "loss": 0.3592, - "step": 16573 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 9.746518156994455e-12, - "loss": 0.3917, - "step": 16574 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 7.160707525821764e-12, - "loss": 0.4475, - "step": 16575 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 4.9727137407806765e-12, - "loss": 0.3631, - "step": 16576 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 3.1825368895788132e-12, - "loss": 0.4907, - "step": 16577 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.7901770421602238e-12, - "loss": 0.5203, - "step": 16578 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 7.956342540360595e-13, - "loss": 0.396, - "step": 16579 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 1.9890856517434943e-13, - "loss": 0.4152, - "step": 16580 - }, - { - "epoch": 3.0, - "grad_norm": 0.0, - "learning_rate": 0.0, - "loss": 0.3806, - "step": 16581 - } - ], - "logging_steps": 1.0, - "max_steps": 16581, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "total_flos": 2.0842876858632503e+19, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}